diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 62a571b..871207d 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -34,6 +34,12 @@ jobs:
         run: ./gradlew :shared:check --no-daemon
 
       - name: Build Android debug APK
+        env:
+          # Optional Hugging Face read-token baked into the APK at build
+          # time; needed to download gated LiteRT-LM / Gemma model bundles
+          # without the user pasting one into Settings. Repository secret;
+          # build still succeeds if absent (token defaults to empty).
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: ./gradlew :androidApp:assembleDebug --no-daemon
 
       - name: Resolve APK artifact name
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 7c13f0c..9bf031c 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -49,6 +49,11 @@ jobs:
           ANDROID_STORE_PASSWORD: ${{ secrets.ANDROID_STORE_PASSWORD }}
           ANDROID_KEY_ALIAS: ${{ secrets.ANDROID_KEY_ALIAS }}
           ANDROID_KEY_PASSWORD: ${{ secrets.ANDROID_KEY_PASSWORD }}
+          # Optional Hugging Face read-token baked into the APK so gated
+          # LiteRT-LM / Gemma bundles can be downloaded without the user
+          # pasting a token. Empty / absent secret leaves the default empty
+          # and the user is prompted in Settings.
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           ./gradlew :androidApp:assembleRelease --no-daemon \
             -PappVersion=${{ steps.version.outputs.version }} \
diff --git a/README.md b/README.md
index 2b18181..0bbdb93 100644
--- a/README.md
+++ b/README.md
@@ -111,6 +111,46 @@ Per-platform repository implementations:
 ./gradlew :shared:build                 # Build the KMP library
 ```
 
+### Hugging Face Token (gated models)
+
+Most LiteRT-LM models in the catalog (Gemma 3, Gemma 4, FunctionGemma) are *gated* on Hugging Face — the API will reject downloads with HTTP 401 until two things are true:
+
+1. You hold a Hugging Face access token with **read** scope. Create one at <https://huggingface.co/settings/tokens>.
+2. You've accepted each model's licence on its HF page (e.g. <https://huggingface.co/google/gemma-3-1b-it>). Acceptance is per-repo and is a one-time click on the web UI.
+
+URLVault accepts the token from three sources, in this order of precedence:
+
+1. **User-entered** — Settings → Local AI Models → "Hugging Face token". Stored in `EncryptedSharedPreferences` on the device. Best for personal builds.
+2. **Build-time `HF_TOKEN` env var** — read by `androidApp/build.gradle.kts` and exposed as `BuildConfig.HF_TOKEN_DEFAULT`. Used by CI.
+3. **Build-time `hfToken` in `local.properties`** — same destination, fallback when the env var is absent. Used by local developer builds.
+
+The Settings row reads "Using token bundled with this build" when sources 2 or 3 are present and the user hasn't entered one of their own.
+
+#### Local developer builds
+
+Add a single line to `local.properties` at the repo root (already in `.gitignore` — the token never leaves your machine):
+
+```properties
+hfToken=hf_xxxxxxxxxxxxxxxxxxxx
+```
+
+After that, `./gradlew :androidApp:assembleDebug` and `./gradlew :androidApp:installDebug` will pick the token up automatically. Or set the env var per-invocation:
+
+```bash
+HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxx ./gradlew :androidApp:assembleDebug
+```
+
+#### CI builds
+
+Add `HF_TOKEN` as a repository secret on GitHub:
+
+- *Settings → Secrets and variables → Actions → New repository secret*, name `HF_TOKEN`.
+- The existing `build.yml` and `release.yml` workflows already read it. With the secret absent, builds still succeed and the APK ships with the field empty (the user is prompted in Settings).
+
+> **Security note.** Anything baked into the APK can be recovered by reverse-engineering. Only ship a *read-only* token that is acceptable for the people who will install the build. The user-entered path stores the token in EncryptedSharedPreferences (Android Keystore-wrapped) and is the safer default for shared / public builds.
+
+The downloader scrubs the `Authorization` header on cross-origin redirects (HF 302s gated downloads to a pre-signed CDN URL on `cas-bridge.xethub.hf.co`, which would otherwise reject the extra header with 401), so the token only travels to `huggingface.co` itself.
+
 ### iOS
 
 1. Open `iosApp/iosApp.xcodeproj` in Xcode
diff --git a/androidApp/build.gradle.kts b/androidApp/build.gradle.kts
index 5398330..c669085 100644
--- a/androidApp/build.gradle.kts
+++ b/androidApp/build.gradle.kts
@@ -1,6 +1,7 @@
 import org.jetbrains.kotlin.gradle.dsl.JvmTarget
 import java.nio.file.Files
 import java.util.Base64
+import java.util.Properties
 
 plugins {
     alias(libs.plugins.android.application)
@@ -42,6 +43,31 @@ android {
 
         testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
 
+        // Optional Hugging Face read-token, baked into the APK at build time
+        // so the downloader can fetch gated LiteRT-LM bundles without the
+        // user pasting a token. Two sources, in order of precedence:
+        //   1. HF_TOKEN env var — used by CI (GitHub Actions secret).
+        //   2. `hfToken` property in <repo>/local.properties — used for
+        //      local developer builds. local.properties is gitignored so
+        //      the token never leaves the developer's machine.
+        // Empty default lets the build succeed without either; the user can
+        // paste a token into the Settings screen instead.
+        // Whitespace and any non-token characters are stripped to keep the
+        // generated string literal safe — real HF tokens are alphanumeric
+        // with `_` / `-`. Note: anything baked into the APK is recoverable
+        // via reverse engineering — only ship a *read-only* HF token here.
+        val hfTokenFromLocalProps: String? = rootProject.file("local.properties")
+            .takeIf { it.exists() }
+            ?.let { f ->
+                val props = Properties()
+                f.inputStream().use { stream -> props.load(stream) }
+                props.getProperty("hfToken")
+            }
+        val hfTokenDefault = (System.getenv("HF_TOKEN") ?: hfTokenFromLocalProps ?: "")
+            .trim()
+            .filter { it.isLetterOrDigit() || it == '_' || it == '-' }
+        buildConfigField("String", "HF_TOKEN_DEFAULT", "\"$hfTokenDefault\"")
+
         // Llamatik ships native libs for arm64-v8a, armeabi-v7a, x86, x86_64.
         // libllama_jni.so alone is ~23 MB per ABI; restricting to arm64-v8a cuts
         // ~90 MB of unused code from the APK. Every supported Android device
diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/MainActivity.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/MainActivity.kt
index 6c02822..09c9607 100644
--- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/MainActivity.kt
+++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/MainActivity.kt
@@ -2,11 +2,14 @@ package com.jaeckel.urlvault.android
 
 import android.content.Intent
 import android.os.Bundle
-import android.widget.Toast
 import androidx.activity.ComponentActivity
+import androidx.activity.compose.BackHandler
 import androidx.activity.compose.setContent
 import androidx.activity.enableEdgeToEdge
+import androidx.compose.foundation.layout.Box
 import androidx.compose.foundation.layout.Column
+import androidx.compose.foundation.layout.fillMaxSize
+import androidx.compose.foundation.layout.navigationBarsPadding
 import androidx.compose.foundation.layout.statusBarsPadding
 import androidx.compose.ui.Modifier
 import androidx.compose.runtime.LaunchedEffect
@@ -16,6 +19,7 @@ import androidx.compose.runtime.mutableStateOf
 import androidx.compose.runtime.produceState
 import androidx.compose.runtime.remember
 import androidx.compose.runtime.setValue
+import kotlinx.coroutines.delay
 import com.jaeckel.urlvault.ai.AiProviderIds
 import com.jaeckel.urlvault.ai.ModelCatalog
 import com.jaeckel.urlvault.ai.ModelCatalogEntry
@@ -30,6 +34,8 @@ import com.jaeckel.urlvault.android.sync.AndroidBitwardenPreferences
 import com.jaeckel.urlvault.model.Bookmark
 import com.jaeckel.urlvault.sync.BitwardenSyncService
 import com.jaeckel.urlvault.ui.AddEditBookmarkScreen
+import com.jaeckel.urlvault.ui.AiActivityState
+import com.jaeckel.urlvault.ui.AiActivityStatusLine
 import com.jaeckel.urlvault.ui.BookmarkListScreen
 import com.jaeckel.urlvault.ui.ModelComparisonScreen
 import com.jaeckel.urlvault.ui.ModelStatusBanner
@@ -85,6 +91,11 @@ class MainActivity : ComponentActivity() {
                 val warmingIds by localModelRouter.warmingIds.collectAsState()
                 var customEntries by remember { mutableStateOf(localModelPrefs.loadCustomEntries()) }
                 var activeIds by remember { mutableStateOf(localModelPrefs.loadActiveIds()) }
+                // The user-only token (the build-time fallback isn't shown as
+                // a saved value — the row says "Using token bundled with this
+                // build" instead).
+                var hfToken by remember { mutableStateOf(localModelPrefs.loadUserHfToken().orEmpty()) }
+                val hfTokenFromBuild = remember { localModelPrefs.hasBuildTimeHfToken() }
                 // Settings reads two heavy values from EncryptedSharedPreferences:
                 // the Bitwarden credentials (decrypts via Keystore) and the
                 // field-history blob. Cache them in remembered state and only
@@ -99,28 +110,47 @@ class MainActivity : ComponentActivity() {
                     aiCoreService.initialize()
                 }
 
-                // DEBUG-only: surface which provider actually served each AI call
-                // so we can confirm an "activated" model is what's being used vs.
-                // silently falling back to AICore.
+                // DEBUG-only: surface which provider actually served each AI
+                // call (and how long it took) in a thin auto-hiding strip at
+                // the bottom of the screen. Replaces a much louder Toast that
+                // obscured the form while the user was trying to interact
+                // with it.
+                var aiActivity by remember { mutableStateOf<AiActivityState>(AiActivityState.Hidden) }
                 if (BuildConfig.DEBUG) {
                     LaunchedEffect(Unit) {
                         localModelRouter.events.collect { event ->
-                            val readinessLine = event.readiness.joinToString { (id, r) ->
-                                "${id.substringAfter(':')}=${if (r) "✓" else "✗"}"
-                            }
-                            val activeLine = if (event.activeIds.isEmpty()) "active=none"
-                                else "active=${event.activeIds.joinToString { it.substringAfter(':') }}"
-                            val head = when (event) {
+                            aiActivity = when (event) {
                                 is LocalModelRouter.RouteEvent.Picked ->
-                                    "AI ${event.action}: ${event.providerName}\n${event.reason}"
+                                    AiActivityState.Running(event.action, event.providerName)
+                                is LocalModelRouter.RouteEvent.Completed ->
+                                    AiActivityState.Completed(
+                                        action = event.action,
+                                        providerName = event.providerName,
+                                        durationMs = event.durationMs,
+                                        success = event.success,
+                                    )
                                 is LocalModelRouter.RouteEvent.None ->
-                                    "AI ${event.action}: NO PROVIDER\n${event.reason}"
+                                    AiActivityState.NoProvider(event.action, event.reason)
                             }
-                            val text = "$head\n$activeLine\n$readinessLine"
-                            Toast.makeText(this@MainActivity, text, Toast.LENGTH_LONG).show()
                         }
                     }
                 }
+                // Auto-hide once the user has had time to read the result.
+                // Running stays visible for as long as the LLM is working
+                // (we only transition out of it when Completed/None arrive).
+                LaunchedEffect(aiActivity) {
+                    when (aiActivity) {
+                        is AiActivityState.Completed -> {
+                            delay(3_500)
+                            aiActivity = AiActivityState.Hidden
+                        }
+                        is AiActivityState.NoProvider -> {
+                            delay(5_000)
+                            aiActivity = AiActivityState.Hidden
+                        }
+                        else -> {}
+                    }
+                }
 
                 // Show toggle for any status except Unknown (still probing)
                 val aiCoreAvailable = aiCoreStatus !is AICoreStatus.Unknown && aiCoreStatus !is AICoreStatus.Unavailable
@@ -152,11 +182,34 @@ class MainActivity : ComponentActivity() {
                     }
                 }
 
+                // Without an explicit BackHandler, the system back gesture
+                // bypasses our in-memory `currentScreen` state and finishes
+                // the Activity — i.e. tapping back from Settings exits the
+                // app instead of returning to the bookmark list. Mirror the
+                // in-screen back arrows: Comparison → Settings; Settings and
+                // AddEdit → List. List is the root, so the handler is
+                // disabled there and the OS default (finish) applies.
+                BackHandler(enabled = currentScreen !is Screen.List) {
+                    currentScreen = when (currentScreen) {
+                        is Screen.Comparison -> Screen.Settings
+                        is Screen.Settings, is Screen.AddEdit -> Screen.List
+                        is Screen.List -> Screen.List // unreachable
+                    }
+                }
+
                 Column(
-                    // enableEdgeToEdge() lets content draw under the status
-                    // bar; without statusBarsPadding the banner would land
-                    // behind the system clock / battery icons.
-                    modifier = Modifier.statusBarsPadding(),
+                    // enableEdgeToEdge() lets content draw under the system
+                    // bars; the two *barsPadding modifiers reserve space at
+                    // top and bottom AND consume the corresponding insets so
+                    // descendants (notably the screens' Material Scaffolds
+                    // with BottomAppBar) don't double-pad. Without this, the
+                    // BottomAppBar kept its own gesture-pill padding even
+                    // when the AI activity strip slid in below it, making
+                    // the button row's box visibly grow.
+                    modifier = Modifier
+                        .fillMaxSize()
+                        .statusBarsPadding()
+                        .navigationBarsPadding(),
                 ) {
                     // Persistent status banner — surfaces the active model
                     // warming up or any in-flight download regardless of which
@@ -169,6 +222,12 @@ class MainActivity : ComponentActivity() {
                         catalog = ModelCatalog.builtIn + customEntries,
                         aiCoreId = AiProviderIds.AICORE,
                     )
+                    // Wrap the active screen in a weighted Box so the AI
+                    // activity strip below can claim its natural height
+                    // without overlapping the screen's own bottom buttons —
+                    // when the strip is visible the screen's available
+                    // height shrinks and its Save / Cancel row reflows up.
+                    Box(modifier = Modifier.weight(1f).fillMaxSize()) {
                 when (val screen = currentScreen) {
                     is Screen.List -> BookmarkListScreen(
                         viewModel = bookmarkViewModel,
@@ -268,6 +327,12 @@ class MainActivity : ComponentActivity() {
                                 // generate() call doesn't pay model-load cost.
                                 if (active) appScope.launch { localModelRouter.warmUpActive() }
                             },
+                            hfToken = hfToken,
+                            hfTokenFromBuild = hfTokenFromBuild,
+                            onHfTokenChanged = { newToken ->
+                                hfToken = newToken
+                                localModelPrefs.saveHfToken(newToken)
+                            },
                             onAddCustomModel = { hfRepo, hfFile, displayName ->
                                 val newEntry = ModelCatalogEntry(
                                     id = "custom:" + hfRepo.lowercase().replace('/', '_') + ":" + hfFile.lowercase(),
@@ -302,7 +367,19 @@ class MainActivity : ComponentActivity() {
                         )
                     }
                 }
-                }   // close Column wrapping the banner + screen content
+                }   // close weighted Box wrapping the screen
+
+                    // DEBUG-only AI activity strip. Last child of the Column
+                    // so when AnimatedVisibility expands it from 0-height
+                    // the screen above is pushed up — its Save button stays
+                    // visible. The outer Column already consumed the nav
+                    // bar inset, so the strip needs no padding of its own.
+                    if (BuildConfig.DEBUG) {
+                        AiActivityStatusLine(
+                            state = aiActivity,
+                        )
+                    }
+                }   // close outer Column
             }
         }
     }
diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/AICoreService.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/AICoreService.kt
index 8a974f5..0b4dc6f 100644
--- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/AICoreService.kt
+++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/AICoreService.kt
@@ -288,13 +288,26 @@ class AICoreService(httpClient: HttpClient) {
 
     /**
      * Generate a 1-2 sentence description for a bookmark.
-     * Fetches the web page to provide context for an accurate description.
+     *
+     * Same shape as [generateTitle]: if the page itself carries a
+     * publisher-written summary (`<meta property="og:description">` or
+     * `<meta name="description">`), return it verbatim — the LLM can't beat
+     * what the author wrote about their own page, and burning a Gemini Nano
+     * call to "rewrite" an existing 1-2 sentence summary is wasted work
+     * that often degrades the result. The LLM only fires for pages with no
+     * metadata-provided description, where genuine extraction from
+     * `visibleText` is needed.
      */
     suspend fun generateDescription(url: String, title: String): Result<String> {
         return runCatching {
             val pageContent = fetchPageContent(url)
-            val pageSummary = pageContent?.bestSummary(MAX_PAGE_CONTENT_LENGTH) ?: ""
 
+            val nativeDesc = pageContent?.let { it.ogDescription ?: it.metaDescription }
+            if (!nativeDesc.isNullOrBlank()) {
+                return@runCatching validateDescription(nativeDesc.trim())
+            }
+
+            val pageSummary = pageContent?.visibleText.orEmpty().take(MAX_PAGE_CONTENT_LENGTH)
             val prompt = buildString {
                 appendLine("Write a 1-2 sentence factual description for this bookmark.")
                 appendLine("Return ONLY the description, nothing else.")
@@ -305,15 +318,12 @@ class AICoreService(httpClient: HttpClient) {
                     appendLine("Title: $title")
                 }
                 if (pageSummary.isNotBlank()) {
-                    appendLine("Page summary: $pageSummary")
+                    appendLine("Page text: $pageSummary")
                 } else {
                     appendLine("If you cannot determine what the page is about, respond with: Unable to generate description.")
                 }
             }
-            
-            // See generateTags() — inline runBenchmarking removed for the
-            // same reason; explicit comparison lives in
-            // ModelComparisonScreen.
+
             validateDescription(runInference(prompt).trim())
         }
     }
diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LeapModelProvider.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LeapModelProvider.kt
index 1627861..bda1bd9 100644
--- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LeapModelProvider.kt
+++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LeapModelProvider.kt
@@ -114,7 +114,24 @@ class LeapModelProvider(
 
     override suspend fun generateDescription(url: String, title: String): Result<String> = runCatching {
         val pageContent = runCatching { contentExtractor.extract(url) }.getOrNull()
-        val pageSummary = pageContent?.bestSummary(MAX_PAGE_CONTENT_LENGTH).orEmpty()
+
+        // Short-circuit on a page-provided description — same shape as
+        // generateTitle. Two reasons this matters specifically for LFM2-
+        // Extract:
+        //  - it's an *extraction* fine-tune, not a generation one. Asking
+        //    it to rewrite an already-good summary just wastes a model
+        //    call;
+        //  - on pages where the supplied text has nothing extractable, the
+        //    grammar's `minLength: 1` cornering produces degenerate
+        //    sequences like `:","",..."` (see `looksDegenerate`). Skipping
+        //    the LLM entirely when a usable description is already
+        //    available eliminates that failure mode for those pages.
+        val nativeDesc = pageContent?.let { it.ogDescription ?: it.metaDescription }
+        if (!nativeDesc.isNullOrBlank()) {
+            return@runCatching validateDescription(nativeDesc.trim())
+        }
+
+        val pageSummary = pageContent?.visibleText.orEmpty().take(MAX_PAGE_CONTENT_LENGTH)
 
         val schema = """
             {
@@ -135,9 +152,21 @@ class LeapModelProvider(
         // this as "extract a summary from the supplied text" rather than
         // "write a description"; otherwise the model has nothing to extract,
         // the grammar still forces a non-empty string, and we get garbage
-        // (the original prompt produced a single-comma description).
+        // (the original prompt produced a single-comma description; a later
+        // observed regression produced `{"description":":\",\",..."}` —
+        // valid JSON shape, garbage value, when supplied text was thin).
+        // Defences against that mode:
+        //   - state explicitly that real natural-language sentences are
+        //     required and that punctuation-only output is wrong;
+        //   - give the model a concrete fallback to emit when there's
+        //     nothing to extract, so it doesn't have to invent garbage to
+        //     satisfy the grammar.
+        // The provider also rejects degenerate output post-hoc — see
+        // `looksDegenerate`.
         val task = buildString {
-            appendLine("Extract a 1-2 sentence summary describing what the web page below is about. Use only information present in the supplied text.")
+            appendLine("Extract a 1-2 sentence summary describing what the web page below is about, using only information present in the supplied text.")
+            appendLine("The summary must be real English (or German) sentences with normal words and spaces — never punctuation-only output.")
+            appendLine("If the supplied text does not contain enough information to summarise, return exactly: No summary available.")
             appendLine()
             appendLine("URL: $url")
             if (title.isNotBlank()) appendLine("Title: $title")
@@ -145,9 +174,9 @@ class LeapModelProvider(
                 appendLine("Page content:")
                 appendLine(pageSummary)
             } else {
-                // No page content fetched — give the model something concrete
-                // to extract from rather than asking it to invent prose.
-                appendLine("Page content: (unavailable — derive a one-sentence summary from the URL and title only)")
+                // No page content fetched — explicitly authorise the
+                // canonical fallback rather than asking for invented prose.
+                appendLine("Page content: (unavailable — return: No summary available.)")
             }
             appendLine()
             appendLine("Return the extracted summary as the \"description\" field.")
@@ -159,7 +188,12 @@ class LeapModelProvider(
         }
         Log.i(TAG, "[$id] description raw: $raw")
 
-        validateDescription(parseJson<DescriptionExtraction>(raw).description.trim())
+        val text = parseJson<DescriptionExtraction>(raw).description.trim()
+        if (looksDegenerate(text)) {
+            Log.w(TAG, "[$id] description rejected as degenerate: ${text.take(80)}")
+            error("Model produced degenerate output (no extractable content)")
+        }
+        validateDescription(text)
     }
 
     override suspend fun generateTitle(url: String): Result<String> = runCatching {
@@ -203,7 +237,33 @@ class LeapModelProvider(
         }
         Log.i(TAG, "[$id] title raw: $raw")
 
-        parseJson<TitleExtraction>(raw).title.trim().removeSurrounding("\"")
+        val text = parseJson<TitleExtraction>(raw).title.trim().removeSurrounding("\"")
+        if (looksDegenerate(text)) {
+            Log.w(TAG, "[$id] title rejected as degenerate: ${text.take(80)}")
+            error("Model produced degenerate output (no extractable content)")
+        }
+        text
+    }
+
+    /**
+     * Heuristic to catch the LFM2-Extract failure mode where the grammar-
+     * constrained sampler forces a non-empty string but the supplied text
+     * has nothing to extract — the model fills the budget with degenerate
+     * sequences like `:","","",...`. JSON shape is valid; value is garbage.
+     *
+     * Real natural-language output is mostly letters with reasonable
+     * character diversity. Reject anything that fails both bars so the UI
+     * surfaces "AI generation failed" instead of persisting garbage.
+     */
+    private fun looksDegenerate(text: String): Boolean {
+        val trimmed = text.trim()
+        if (trimmed.length < 5) return true
+        val letterCount = trimmed.count { it.isLetter() }
+        val letterRatio = letterCount.toDouble() / trimmed.length
+        if (letterRatio < 0.4) return true
+        val distinctChars = trimmed.toSet().size
+        if (distinctChars < 5) return true
+        return false
     }
 
     /**
diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmModelProvider.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmModelProvider.kt
index 77a1175..f5fd9c0 100644
--- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmModelProvider.kt
+++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmModelProvider.kt
@@ -40,6 +40,15 @@ class LiteRtLmModelProvider(
 
     override suspend fun isReady(): Boolean = bridge.isAvailable()
 
+    /**
+     * Backend the SDK ended up loading on (`"NPU"` / `"GPU"` / `"CPU"`),
+     * or null if no model is loaded yet. Read by `LocalModelRouter` to
+     * enrich the debug provenance tag — the saved bookmark then carries
+     * `liteRt[GPU]:gemma-3-1b-it-int4:2.34s` so it's obvious at a glance
+     * whether NPU/GPU acceleration was actually in play.
+     */
+    fun currentBackendLabel(): String? = bridge.currentBackendLabel()
+
     override suspend fun preload() {
         // Same mutex as the generate path so an inference call can't race a
         // warm-up into the LiteRT-LM Engine constructor.
@@ -115,7 +124,20 @@ class LiteRtLmModelProvider(
 
     override suspend fun generateDescription(url: String, title: String): Result<String> = runCatching {
         val pageContent = runCatching { contentExtractor.extract(url) }.getOrNull()
-        val pageSummary = pageContent?.bestSummary(MAX_PAGE_CONTENT_LENGTH).orEmpty()
+
+        // Short-circuit on a page-provided description — same shape as
+        // generateTitle. Most pages carry a publisher-written
+        // og:description / <meta name="description"> already optimised for
+        // social-card / SERP display; the LLM rewrite is wasted work and
+        // on Tensor CPU here it costs ~1–5 seconds per call. Skip
+        // straight to it. The model fires only when the page has no
+        // metadata-provided description.
+        val nativeDesc = pageContent?.let { it.ogDescription ?: it.metaDescription }
+        if (!nativeDesc.isNullOrBlank()) {
+            return@runCatching validateDescription(nativeDesc.trim())
+        }
+
+        val pageSummary = pageContent?.visibleText.orEmpty().take(MAX_PAGE_CONTENT_LENGTH)
 
         val example = """{"description": "A Kotlin Multiplatform tutorial covering shared UI with Compose."}"""
 
diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmNativeBridge.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmNativeBridge.kt
index 8ee427e..2e4e8eb 100644
--- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmNativeBridge.kt
+++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmNativeBridge.kt
@@ -18,6 +18,14 @@ interface LiteRtLmNativeBridge {
     /** Whether LiteRT-LM loaded successfully and the device can run inference. */
     fun isAvailable(): Boolean
 
+    /**
+     * Label for the currently loaded backend (`"NPU"` / `"GPU"` / `"CPU"`),
+     * or null if no model is loaded. Surfaced in the debug provenance tag
+     * so the saved bookmark answers "did it run on NPU/GPU/CPU?" at a
+     * glance, without having to dig through logcat.
+     */
+    fun currentBackendLabel(): String? = null
+
     /**
      * Loads the `.litertlm` bundle at [absolutePath] into memory. Idempotent
      * per path: a repeated call with the same path is a no-op; a different
diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmSdkBridge.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmSdkBridge.kt
index 9a04063..9235376 100644
--- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmSdkBridge.kt
+++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmSdkBridge.kt
@@ -30,9 +30,23 @@ fun interface LiteRtLmBackendStrategy {
 
 /**
  * NPU first when the device's `nativeLibraryDir` is non-blank (vendor libs
- * are loaded from there for QCS / Pixel chips), then GPU, then CPU. On
- * unsupported devices the NPU init throws and `load()` falls through to
- * the next backend.
+ * are loaded from there for QCS / Pixel chips), then **CPU**, then GPU.
+ *
+ * GPU is intentionally last. On every Pixel Tensor we've tested (G2 on
+ * Pixel 7a, G5 on Pixel 10 Pro Fold) the GPU engine loads but the first
+ * generate call throws `Can not find OpenCL library on this device` —
+ * LiteRT-LM 0.10.x auto-selects an OpenCL Top-K sampler from the engine
+ * backend and Tensor doesn't ship OpenCL drivers. The SDK has no public
+ * knob to use the CPU sampler with a GPU engine, so on Tensor the only
+ * way to get a working sampler is to run the engine on CPU too. Putting
+ * CPU before GPU avoids a wasted ~5–10 s GPU load + failed generate
+ * cycle on every cold start on those devices.
+ *
+ * Cost: on a hypothetical device with working OpenCL drivers we'd miss
+ * the GPU speedup. We don't currently have such a test device and the
+ * "correct on Tensor" trade is much more important. The runtime
+ * self-heal in [LiteRtLmSdkBridge.runCollect] still catches the OpenCL
+ * error if a custom strategy puts GPU first.
  */
 object DefaultBackendStrategy : LiteRtLmBackendStrategy {
     override fun candidates(nativeLibDir: String): List<Pair<String, Backend>> {
@@ -40,9 +54,9 @@ object DefaultBackendStrategy : LiteRtLmBackendStrategy {
         if (nativeLibDir.isNotBlank()) {
             list.add("NPU" to Backend.NPU(nativeLibDir))
         }
-        list.add("GPU" to Backend.GPU())
         // null = default thread count picked by the runtime.
         list.add("CPU" to Backend.CPU(null))
+        list.add("GPU" to Backend.GPU())
         return list
     }
 }
@@ -70,6 +84,16 @@ class LiteRtLmSdkBridge(
     private var currentPath: String? = null
     private var currentBackend: String? = null
 
+    /**
+     * Backends that *initialised successfully* but then failed at runtime
+     * during `generateContent` (e.g. Pixel 7a's Tensor G2 GPU loads fine but
+     * the Top-K sampler tries to dlopen OpenCL and the Tensor stack has none,
+     * so `runCollect` throws `Can not find OpenCL library on this device`).
+     * Filtered out of subsequent loads in this process so the bridge doesn't
+     * keep redoing the same dance every call. Cleared on app process death.
+     */
+    private val runtimeBlockedBackends = mutableSetOf<String>()
+
     private val classLoaderProbe: Boolean by lazy {
         try {
             Class.forName("com.google.ai.edge.litertlm.Engine")
@@ -83,65 +107,93 @@ class LiteRtLmSdkBridge(
 
     override fun isAvailable(): Boolean = classLoaderProbe
 
+    override fun currentBackendLabel(): String? = currentBackend
+
     override suspend fun load(absolutePath: String) {
         mutex.withLock {
             if (currentPath == absolutePath && engine != null) {
                 Log.v(TAG, "load: already loaded $absolutePath, no-op")
                 return
             }
-            withContext(Dispatchers.IO) {
-                engine?.let {
-                    Log.i(TAG, "load: switching model — closing previous $currentPath")
-                    runCatching { it.close() }
-                }
-                engine = null
-                currentPath = null
-                currentBackend = null
+            loadInternalLocked(absolutePath)
+        }
+    }
 
-                val cacheDir = File(context.cacheDir, "litertlm").also { it.mkdirs() }
-                val nativeLibDir = context.applicationInfo.nativeLibraryDir.orEmpty()
-                val backendsToTry = backendStrategy.candidates(nativeLibDir)
-
-                var lastError: Throwable? = null
-                for ((label, backend) in backendsToTry) {
-                    val t0 = System.currentTimeMillis()
-                    Log.i(TAG, "load: trying backend=$label for $absolutePath")
-                    val candidate = Engine(
-                        EngineConfig(
-                            modelPath = absolutePath,
-                            backend = backend,
-                            visionBackend = backend,
-                            audioBackend = backend,
-                            maxNumTokens = null,
-                            maxNumImages = null,
-                            cacheDir = cacheDir.absolutePath,
-                        ),
-                    )
-                    val initOk = runCatching { candidate.initialize() }
-                    if (initOk.isSuccess) {
-                        engine = candidate
-                        currentPath = absolutePath
-                        currentBackend = label
-                        Log.i(
-                            TAG,
-                            "load: ready on $label in ${System.currentTimeMillis() - t0}ms — $absolutePath",
-                        )
-                        return@withContext
-                    } else {
-                        lastError = initOk.exceptionOrNull()
-                        Log.w(
-                            TAG,
-                            "load: backend=$label failed (${lastError?.message}); trying next",
-                        )
-                        runCatching { candidate.close() }
-                    }
-                }
-                val tried = backendsToTry.joinToString(" → ") { it.first }
+    /**
+     * Same logic as [load] but assumes the caller already holds [mutex].
+     * Exists so [runCollect] can reload the engine on the next backend after
+     * an OpenCL-style runtime failure without dropping and re-acquiring the
+     * mutex (which would let another caller race in mid-recovery).
+     */
+    private suspend fun loadInternalLocked(absolutePath: String) {
+        withContext(Dispatchers.IO) {
+            engine?.let {
+                Log.i(TAG, "load: switching model — closing previous $currentPath")
+                runCatching { it.close() }
+            }
+            engine = null
+            currentPath = null
+            currentBackend = null
+
+            val cacheDir = File(context.cacheDir, "litertlm").also { it.mkdirs() }
+            val nativeLibDir = context.applicationInfo.nativeLibraryDir.orEmpty()
+            val backendsToTry = backendStrategy.candidates(nativeLibDir)
+                .filterNot { (label, _) -> label in runtimeBlockedBackends }
+
+            if (backendsToTry.isEmpty()) {
                 throw IllegalStateException(
-                    "LiteRT-LM failed on every backend ($tried). Last error: ${lastError?.message}",
-                    lastError,
+                    "LiteRT-LM has no usable backends left for this session " +
+                        "(all blocked by prior runtime failures: $runtimeBlockedBackends)",
                 )
             }
+
+            var lastError: Throwable? = null
+            for ((label, backend) in backendsToTry) {
+                val t0 = System.currentTimeMillis()
+                Log.i(TAG, "load: trying backend=$label for $absolutePath")
+                // visionBackend / audioBackend left null: every entry in
+                // ModelCatalog is text-only. Setting them to `backend`
+                // tells the engine to enable those modalities, and
+                // initialize() then fails with `NOT_FOUND:
+                // TF_LITE_VISION_ENCODER not found in the model.` for
+                // text-only bundles (FunctionGemma 270M, Gemma 3 270M,
+                // Qwen3 0.6B, etc.). When a true multi-modal Gemma 4 E2B
+                // bundle is added later, switch this on per-entry.
+                val candidate = Engine(
+                    EngineConfig(
+                        modelPath = absolutePath,
+                        backend = backend,
+                        visionBackend = null,
+                        audioBackend = null,
+                        maxNumTokens = null,
+                        maxNumImages = null,
+                        cacheDir = cacheDir.absolutePath,
+                    ),
+                )
+                val initOk = runCatching { candidate.initialize() }
+                if (initOk.isSuccess) {
+                    engine = candidate
+                    currentPath = absolutePath
+                    currentBackend = label
+                    Log.i(
+                        TAG,
+                        "load: ready on $label in ${System.currentTimeMillis() - t0}ms — $absolutePath",
+                    )
+                    return@withContext
+                } else {
+                    lastError = initOk.exceptionOrNull()
+                    Log.w(
+                        TAG,
+                        "load: backend=$label failed (${lastError?.message}); trying next",
+                    )
+                    runCatching { candidate.close() }
+                }
+            }
+            val tried = backendsToTry.joinToString(" → ") { it.first }
+            throw IllegalStateException(
+                "LiteRT-LM failed on every backend ($tried). Last error: ${lastError?.message}",
+                lastError,
+            )
         }
     }
 
@@ -168,6 +220,43 @@ class LiteRtLmSdkBridge(
     }
 
     private suspend fun runCollect(text: String, maxTokens: Int): String {
+        return try {
+            runCollectOnce(text, maxTokens)
+        } catch (t: Throwable) {
+            // Pixel 7a / Tensor G2: the GPU backend initialises fine but
+            // generation throws `Can not find OpenCL library on this device`
+            // because LiteRT-LM's Top-K sampler dlopens OpenCL even on the
+            // WebGPU path. Blocklist that backend so the *next* call reloads
+            // on the remaining strategy candidates (typically CPU).
+            //
+            // We deliberately do NOT reload + retry inline here. `Engine.close()`
+            // doesn't release the GPU pipeline's native memory synchronously
+            // — observed on Pixel 7a, the in-flight reload of the CPU engine
+            // briefly held both pipelines in RAM and the process peaked at
+            // ~5.96 GB, well past Pixel 7a's effective per-app budget. The
+            // LMK reaped the app and the user saw an unexplained "LiteRT
+            // crashed the app" with no FATAL exception in logcat. Bailing
+            // out here keeps peak memory at 1× model and lets the very next
+            // entry-point call (provider.generateXxx → bridge.load) start
+            // from a clean slate with the blocklist already applied.
+            val brokenBackend = currentBackend
+            if (brokenBackend != null && isRecoverableRuntimeError(t)) {
+                Log.w(
+                    TAG,
+                    "Recovering from $brokenBackend runtime failure (${t.message?.take(120)}) — " +
+                        "blocklisting; next request will reload on remaining backends.",
+                )
+                runtimeBlockedBackends += brokenBackend
+                runCatching { engine?.close() }
+                engine = null
+                currentPath = null
+                currentBackend = null
+            }
+            throw t
+        }
+    }
+
+    private suspend fun runCollectOnce(text: String, maxTokens: Int): String {
         val current = engine ?: error("LiteRT-LM: no model loaded")
         // maxNumTokens here is advisory — the SDK still respects the config-
         // level cap. We pass through whatever sampling the user requests.
@@ -196,6 +285,16 @@ class LiteRtLmSdkBridge(
         }
     }
 
+    /**
+     * Recoverable = the engine loaded but a runtime feature it tried to use
+     * isn't on this device. Right now the only known case is OpenCL missing
+     * on Pixel Tensor; widen as we hit more.
+     */
+    private fun isRecoverableRuntimeError(t: Throwable): Boolean {
+        val msg = (t.message ?: "").lowercase()
+        return "opencl" in msg || "open cl" in msg
+    }
+
     override suspend fun unload() {
         mutex.withLock {
             withContext(Dispatchers.IO) {
diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LlamaCppModelProvider.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LlamaCppModelProvider.kt
index 7d1d4af..32fbd62 100644
--- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LlamaCppModelProvider.kt
+++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LlamaCppModelProvider.kt
@@ -77,8 +77,19 @@ class LlamaCppModelProvider(
 
     override suspend fun generateDescription(url: String, title: String): Result<String> = runCatching {
         val pageContent = runCatching { contentExtractor.extract(url) }.getOrNull()
-        val pageSummary = pageContent?.bestSummary(MAX_PAGE_CONTENT_LENGTH).orEmpty()
 
+        // Same short-circuit as generateTitle: prefer the publisher's own
+        // description (og:description / <meta name="description">) over a
+        // model rewrite. The GGUF model is most useful when the page has
+        // *no* metadata-provided summary; otherwise we just spend several
+        // seconds rewriting a 1-2 sentence string into a slightly worse
+        // 1-2 sentence string.
+        val nativeDesc = pageContent?.let { it.ogDescription ?: it.metaDescription }
+        if (!nativeDesc.isNullOrBlank()) {
+            return@runCatching validateDescription(nativeDesc.trim())
+        }
+
+        val pageSummary = pageContent?.visibleText.orEmpty().take(MAX_PAGE_CONTENT_LENGTH)
         val prompt = buildString {
             appendLine("Write a 1-2 sentence factual description for this bookmark.")
             appendLine("Return ONLY the description, nothing else.")
@@ -87,7 +98,7 @@ class LlamaCppModelProvider(
             appendLine("URL: $url")
             if (title.isNotBlank()) appendLine("Title: $title")
             if (pageSummary.isNotBlank()) {
-                appendLine("Page summary: $pageSummary")
+                appendLine("Page text: $pageSummary")
             } else {
                 appendLine("If you cannot determine what the page is about, respond with: Unable to generate description.")
             }
diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelPreferences.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelPreferences.kt
index 303da5e..6627ecf 100644
--- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelPreferences.kt
+++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelPreferences.kt
@@ -5,6 +5,7 @@ import android.content.SharedPreferences
 import androidx.security.crypto.EncryptedSharedPreferences
 import androidx.security.crypto.MasterKey
 import com.jaeckel.urlvault.ai.ModelCatalogEntry
+import com.jaeckel.urlvault.android.BuildConfig
 import kotlinx.serialization.encodeToString
 import kotlinx.serialization.json.Json
 
@@ -51,7 +52,24 @@ class LocalModelPreferences(private val context: Context) {
         prefs.edit().putStringSet(KEY_ACTIVE_IDS, ids.toSet()).apply()
     }
 
-    fun loadHfToken(): String? = prefs.getString(KEY_HF_TOKEN, null)
+    /**
+     * User-saved token wins; if blank, fall back to [BuildConfig.HF_TOKEN_DEFAULT]
+     * so a CI build that injected `HF_TOKEN` can ship gated-model access without
+     * any user action. Returns null when neither source has a token.
+     */
+    fun loadHfToken(): String? {
+        val saved = prefs.getString(KEY_HF_TOKEN, null)?.takeIf { it.isNotBlank() }
+        if (saved != null) return saved
+        return BuildConfig.HF_TOKEN_DEFAULT.takeIf { it.isNotBlank() }
+    }
+
+    /**
+     * The literal user-entered value (without the build-time fallback) so the
+     * Settings UI can show "(none)" vs. "saved: hf_…" honestly. Use
+     * [loadHfToken] for the value the downloader should actually send.
+     */
+    fun loadUserHfToken(): String? =
+        prefs.getString(KEY_HF_TOKEN, null)?.takeIf { it.isNotBlank() }
 
     fun saveHfToken(token: String?) {
         prefs.edit().apply {
@@ -59,6 +77,9 @@ class LocalModelPreferences(private val context: Context) {
         }.apply()
     }
 
+    /** True iff the APK was built with a non-empty `HF_TOKEN` env var. */
+    fun hasBuildTimeHfToken(): Boolean = BuildConfig.HF_TOKEN_DEFAULT.isNotBlank()
+
     companion object {
         private const val PREFS_NAME = "urlvault_local_models_encrypted"
         private const val KEY_CUSTOM_ENTRIES = "custom_entries"
diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelRouter.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelRouter.kt
index 5847593..ce2f491 100644
--- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelRouter.kt
+++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelRouter.kt
@@ -3,6 +3,9 @@ package com.jaeckel.urlvault.android.ai
 import android.util.Log
 import com.jaeckel.urlvault.ai.LocalModelProvider
 import com.jaeckel.urlvault.ai.LocalModelRegistry
+import com.jaeckel.urlvault.ai.ModelRuntime
+import com.jaeckel.urlvault.android.BuildConfig
+import kotlinx.coroutines.channels.BufferOverflow
 import kotlinx.coroutines.flow.MutableSharedFlow
 import kotlinx.coroutines.flow.MutableStateFlow
 import kotlinx.coroutines.flow.SharedFlow
@@ -51,9 +54,33 @@ class LocalModelRouter(
             override val readiness: List<Pair<String, Boolean>>,
             val reason: String,
         ) : RouteEvent()
+
+        /**
+         * Fired by `generateXxx` *after* the provider call returns or throws.
+         * Carries the wall-clock duration so a UI status line can show
+         * "tags via Liquid LFM2 Extract — 1247 ms". Note that for `title` on
+         * pages with a usable `<title>`/`og:title`, no LLM ran — duration
+         * reflects only the page fetch, which is intentional.
+         */
+        data class Completed(
+            override val action: String,
+            override val activeIds: Set<String>,
+            override val readiness: List<Pair<String, Boolean>>,
+            val providerId: String,
+            val providerName: String,
+            val durationMs: Long,
+            val success: Boolean,
+        ) : RouteEvent()
     }
 
-    private val _events = MutableSharedFlow<RouteEvent>(extraBufferCapacity = 16)
+    // DROP_OLDEST so a slow / backgrounded collector can never stall the
+    // generate path or silently lose the latest event. The UI only cares
+    // about *current* state, so dropping older Picked/Completed pairs is
+    // safer than letting tryEmit return false for the most recent one.
+    private val _events = MutableSharedFlow<RouteEvent>(
+        extraBufferCapacity = 16,
+        onBufferOverflow = BufferOverflow.DROP_OLDEST,
+    )
     val events: SharedFlow<RouteEvent> = _events.asSharedFlow()
 
     /**
@@ -109,33 +136,6 @@ class LocalModelRouter(
         return PickResult(fallback, reason, active, readinessSummary)
     }
 
-    private suspend fun pickAndEmit(action: String): LocalModelProvider? {
-        val result = pickWithReason()
-        val provider = result.provider
-        if (provider != null) {
-            _events.tryEmit(
-                RouteEvent.Picked(
-                    action = action,
-                    activeIds = result.activeIds,
-                    readiness = result.readiness,
-                    providerId = provider.id,
-                    providerName = provider.displayName,
-                    reason = result.reason,
-                ),
-            )
-        } else {
-            _events.tryEmit(
-                RouteEvent.None(
-                    action = action,
-                    activeIds = result.activeIds,
-                    readiness = result.readiness,
-                    reason = result.reason,
-                ),
-            )
-        }
-        return provider
-    }
-
     /**
      * Whether at least one registered provider can serve a request right now.
      * Used by the UI to decide whether to drive bookmark generation through
@@ -181,20 +181,147 @@ class LocalModelRouter(
     }
 
     suspend fun generateTags(url: String, title: String, content: String): Result<List<String>> {
-        val provider = pickAndEmit("tags")
-            ?: return Result.failure(IllegalStateException("No ready local AI model"))
-        return provider.generateTags(url, title, content)
+        val pick = pickWithReason()
+        val provider = pick.provider
+        if (provider == null) {
+            emitNone("tags", pick)
+            return Result.failure(IllegalStateException("No ready local AI model"))
+        }
+        emitPicked("tags", provider, pick)
+        val t0 = System.nanoTime()
+        val result = runTimed("tags", provider, pick) {
+            provider.generateTags(url, title, content)
+        }
+        val durationMs = (System.nanoTime() - t0) / 1_000_000
+        // DEBUG-only: append a synthetic tag of the form
+        // `<sdk>:<model>:<duration>` (e.g. `leap:lfm2-1.2b-extract:2.34s`)
+        // so a glance at the saved bookmark tells you SDK, model variant,
+        // and how long generation took. Stripped in release builds so
+        // synced Bitwarden entries never carry the marker into production.
+        return if (BuildConfig.DEBUG) {
+            result.map { it + debugProvenanceTag(provider, durationMs) }
+        } else {
+            result
+        }
+    }
+
+    private fun debugProvenanceTag(provider: LocalModelProvider, durationMs: Long): String {
+        val sdk = when (provider.runtime) {
+            ModelRuntime.ML_KIT -> "aicore"
+            ModelRuntime.LLAMA_CPP -> "llama"
+            ModelRuntime.LEAP -> "leap"
+            ModelRuntime.MEDIAPIPE -> "liteRt"
+        }
+        // For LiteRT-LM, append the backend label the SDK actually picked
+        // (NPU/GPU/CPU) so the saved bookmark answers "did acceleration
+        // engage?" without having to grep logcat. The other runtimes don't
+        // expose a comparable concept (AICore is system-managed, llama.cpp
+        // and Leap are CPU-only here), so the suffix only fires for LiteRT.
+        val backendSuffix = (provider as? LiteRtLmModelProvider)
+            ?.currentBackendLabel()
+            ?.let { "[$it]" }
+            .orEmpty()
+        // provider.id is `<runtime-prefix>:<model-id>` (e.g.
+        // `leap:lfm2-1.2b-extract`); strip the prefix so we can substitute
+        // the shorter SDK name without duplicating the runtime label.
+        val model = provider.id.substringAfter(':', missingDelimiterValue = provider.id)
+        // ms below 1s, two-decimal seconds above. Avoids `String.format`
+        // (host-locale-dependent) by doing the math directly.
+        val duration = if (durationMs < 1000) {
+            "${durationMs}ms"
+        } else {
+            val whole = durationMs / 1000
+            val hundredths = (durationMs % 1000) / 10
+            val padded = if (hundredths < 10) "0$hundredths" else "$hundredths"
+            "$whole.${padded}s"
+        }
+        return "$sdk$backendSuffix:$model:$duration"
     }
 
     suspend fun generateDescription(url: String, title: String): Result<String> {
-        val provider = pickAndEmit("description")
-            ?: return Result.failure(IllegalStateException("No ready local AI model"))
-        return provider.generateDescription(url, title)
+        val pick = pickWithReason()
+        val provider = pick.provider
+        if (provider == null) {
+            emitNone("description", pick)
+            return Result.failure(IllegalStateException("No ready local AI model"))
+        }
+        emitPicked("description", provider, pick)
+        return runTimed("description", provider, pick) { provider.generateDescription(url, title) }
     }
 
     suspend fun generateTitle(url: String): Result<String> {
-        val provider = pickAndEmit("title")
-            ?: return Result.failure(IllegalStateException("No ready local AI model"))
-        return provider.generateTitle(url)
+        val pick = pickWithReason()
+        val provider = pick.provider
+        if (provider == null) {
+            emitNone("title", pick)
+            return Result.failure(IllegalStateException("No ready local AI model"))
+        }
+        emitPicked("title", provider, pick)
+        return runTimed("title", provider, pick) { provider.generateTitle(url) }
+    }
+
+    private fun emitPicked(action: String, provider: LocalModelProvider, pick: PickResult) {
+        _events.tryEmit(
+            RouteEvent.Picked(
+                action = action,
+                activeIds = pick.activeIds,
+                readiness = pick.readiness,
+                providerId = provider.id,
+                providerName = provider.displayName,
+                reason = pick.reason,
+            ),
+        )
+    }
+
+    private fun emitNone(action: String, pick: PickResult) {
+        _events.tryEmit(
+            RouteEvent.None(
+                action = action,
+                activeIds = pick.activeIds,
+                readiness = pick.readiness,
+                reason = pick.reason,
+            ),
+        )
+    }
+
+    /**
+     * Times [block] and emits a [RouteEvent.Completed] regardless of how it
+     * exits — normal `Result` (success or failure), or thrown exception
+     * (notably coroutine cancellation, which `runCatching` re-raises). Without
+     * the try/finally, a cancellation would leave the UI strip stuck in
+     * "Running…" forever.
+     *
+     * `inline` is what lets the non-suspending `block` parameter actually call
+     * suspending provider methods — the lambda body is inlined into this
+     * `suspend` function's body, so it runs in a suspending context.
+     *
+     * `nanoTime` is monotonic; `currentTimeMillis` is wall-clock and can jump
+     * backwards on NTP / manual clock changes, producing negative durations.
+     */
+    private suspend inline fun <T> runTimed(
+        action: String,
+        provider: LocalModelProvider,
+        pick: PickResult,
+        block: () -> Result<T>,
+    ): Result<T> {
+        val t0 = System.nanoTime()
+        var success = false
+        try {
+            val result = block()
+            success = result.isSuccess
+            return result
+        } finally {
+            _events.tryEmit(
+                RouteEvent.Completed(
+                    action = action,
+                    activeIds = pick.activeIds,
+                    readiness = pick.readiness,
+                    providerId = provider.id,
+                    providerName = provider.displayName,
+                    durationMs = (System.nanoTime() - t0) / 1_000_000,
+                    success = success,
+                ),
+            )
+        }
     }
 }
diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/ModelDownloadManager.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/ModelDownloadManager.kt
index 0515e2d..bb49a68 100644
--- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/ModelDownloadManager.kt
+++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/ModelDownloadManager.kt
@@ -290,14 +290,16 @@ class ModelDownloadManager(
      * Probe the server for the file's total size without downloading the
      * whole thing. Sends `Range: bytes=0-0` (a 1-byte slice) so the response
      * carries a `Content-Range: bytes 0-0/<total>` header we can parse.
-     * Follows the same manual-redirect chain as openWithRedirects to keep
-     * the Authorization header attached on CDN redirects. Returns -1 if the
+     * Follows the same manual-redirect chain as openWithRedirects, mirroring
+     * its same-host Authorization rule (see notes there). Returns -1 if the
      * server doesn't report a total (e.g. on a non-Range-capable origin).
      */
     private fun discoverTotalBytes(urlString: String, token: String?, maxHops: Int = 5): Long {
+        val originalHost = URL(urlString).host
         var url = URL(urlString)
         var hops = 0
         while (true) {
+            val sameHost = url.host.equals(originalHost, ignoreCase = true)
             val conn = (url.openConnection() as HttpURLConnection).apply {
                 requestMethod = "GET"
                 connectTimeout = 30_000
@@ -305,7 +307,9 @@ class ModelDownloadManager(
                 instanceFollowRedirects = false
                 setRequestProperty("User-Agent", "URLVault/1.0")
                 setRequestProperty("Range", "bytes=0-0")
-                if (!token.isNullOrBlank()) setRequestProperty("Authorization", "Bearer $token")
+                if (sameHost && !token.isNullOrBlank()) {
+                    setRequestProperty("Authorization", "Bearer $token")
+                }
             }
             try {
                 val code = conn.responseCode
@@ -338,9 +342,13 @@ class ModelDownloadManager(
     }
 
     /**
-     * Follow up to 5 redirects manually so we re-apply the Authorization /
-     * Range headers on each hop (HttpURLConnection's automatic redirect
-     * stripping would otherwise drop them).
+     * Follow up to 5 redirects manually so we re-apply the Range header on
+     * each hop (HttpURLConnection would otherwise drop it). The Authorization
+     * header is only attached while we are still on the **original host** —
+     * Hugging Face 302s gated downloads to a pre-signed CDN URL on
+     * `cas-bridge.xethub.hf.co` (and similar), and that CDN rejects extra
+     * `Authorization: Bearer …` headers with HTTP 401. Browsers and curl
+     * drop auth across origins for exactly the same reason.
      */
     private fun openWithRedirects(
         urlString: String,
@@ -348,9 +356,11 @@ class ModelDownloadManager(
         token: String?,
         maxHops: Int = 5,
     ): OpenResult {
+        val originalHost = URL(urlString).host
         var url = URL(urlString)
         var hops = 0
         while (true) {
+            val sameHost = url.host.equals(originalHost, ignoreCase = true)
             val conn = (url.openConnection() as HttpURLConnection).apply {
                 requestMethod = "GET"
                 connectTimeout = 30_000
@@ -358,7 +368,9 @@ class ModelDownloadManager(
                 instanceFollowRedirects = false
                 setRequestProperty("User-Agent", "URLVault/1.0")
                 if (rangeStart > 0) setRequestProperty("Range", "bytes=$rangeStart-")
-                if (!token.isNullOrBlank()) setRequestProperty("Authorization", "Bearer $token")
+                if (sameHost && !token.isNullOrBlank()) {
+                    setRequestProperty("Authorization", "Bearer $token")
+                }
             }
             val code = conn.responseCode
             when (code) {
diff --git a/shared/build.gradle.kts b/shared/build.gradle.kts
index 1a558aa..a2ab916 100644
--- a/shared/build.gradle.kts
+++ b/shared/build.gradle.kts
@@ -37,6 +37,7 @@ kotlin {
             implementation(compose.runtime)
             implementation(compose.foundation)
             implementation(compose.material3)
+            implementation(compose.materialIconsExtended)
             implementation(compose.ui)
             implementation(compose.components.resources)
             implementation(compose.components.uiToolingPreview)
diff --git a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AddEditBookmarkScreen.kt b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AddEditBookmarkScreen.kt
index 16fe6d2..bec8c1e 100644
--- a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AddEditBookmarkScreen.kt
+++ b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AddEditBookmarkScreen.kt
@@ -96,8 +96,15 @@ fun AddEditBookmarkScreen(
 
     val TAG = "AddEditBookmarkScreen"
 
-    // Track which URL we've already triggered AI for, to prevent re-triggering
+    // Track which URL we've already triggered AI for, to prevent re-triggering.
+    // The mode component matters because the share-intent LaunchedEffect
+    // re-keys on `aiCoreEnabled`: when the AI master toggle flips on after
+    // a startup race, we *want* to re-trigger (legacy → AI), but only that
+    // once. Without the mode check, a `force = true` would re-fire even on
+    // unrelated recompositions, producing duplicate description / tags
+    // generations (and two debug provenance tags in the saved bookmark).
     var aiTriggeredForUrl by remember { mutableStateOf<String?>(null) }
+    var aiTriggeredMode by remember { mutableStateOf<String?>(null) }
 
     // Helper to normalize and validate URL for AI triggering
     fun normalizeUrlForAi(rawUrl: String): String? {
@@ -112,12 +119,18 @@ fun AddEditBookmarkScreen(
 
     // Helper to trigger AI/autotag for a given URL
     fun triggerAiForUrl(targetUrl: String, force: Boolean = false) {
-        Logger.d(TAG, "triggerAiForUrl($targetUrl, force=$force)")
-        if (!force && aiTriggeredForUrl == targetUrl) {
-            Logger.d(TAG, "Already triggered for $targetUrl")
+        val desiredMode = if (aiCoreEnabled) "ai" else "legacy"
+        Logger.d(TAG, "triggerAiForUrl($targetUrl, force=$force, mode=$desiredMode)")
+        // Dedup on (URL, mode). Same URL + same mode is a no-op so unrelated
+        // recompositions don't re-fire the AI flow. Same URL + different mode
+        // (legacy → AI when the master toggle flips on after the startup
+        // race) IS a legitimate retrigger and falls through.
+        if (!force && aiTriggeredForUrl == targetUrl && aiTriggeredMode == desiredMode) {
+            Logger.d(TAG, "Already triggered for $targetUrl in $desiredMode mode")
             return
         }
         aiTriggeredForUrl = targetUrl
+        aiTriggeredMode = desiredMode
 
         // If AI is available and enabled, use it for title/desc/tags
         if (aiCoreEnabled) {
@@ -236,6 +249,16 @@ fun AddEditBookmarkScreen(
             is AIGenerationState.Error -> {
                 aiDescriptionError = aiDescriptionState.message
                 onAiDescriptionConsumed()
+                // Description failed — but tags are an independent extraction
+                // and often succeed on the same input (observed: LEAP returned
+                // degenerate punctuation as the description while producing
+                // clean tags for the same URL). Fire tags from URL + title
+                // alone instead of giving up entirely.
+                val currentTarget = normalizeUrlForAi(url)
+                if (aiCoreEnabled && onAiGenerateTags != null && currentTarget != null) {
+                    aiTagError = null
+                    onAiGenerateTags(currentTarget, title, "")
+                }
             }
             else -> {}
         }
@@ -264,8 +287,18 @@ fun AddEditBookmarkScreen(
         }
     }
 
-    // Auto-trigger once for prefilled URLs (share intent).
-    LaunchedEffect(prefilledUrl) {
+    // Auto-trigger for prefilled URLs (share intent). Keyed on
+    // `aiCoreEnabled` as well as `prefilledUrl` so the startup race —
+    // share intent fires before `anyProviderReady`'s async readiness
+    // probe has finished, so `aiCoreEnabled` is briefly false and the
+    // first trigger ends up on the legacy branch — gets corrected once
+    // AI flips on. `triggerAiForUrl`'s mode-aware dedup handles both
+    // cases cleanly: legacy → AI is a real mode change so it re-fires;
+    // a stable-true aiCoreEnabled across recompositions is the same
+    // mode and is deduped. The legacy result-handling LaunchedEffects'
+    // `if (!aiCoreEnabled)` guards already prevent stale legacy results
+    // from clobbering the AI values when this flip happens.
+    LaunchedEffect(prefilledUrl, aiCoreEnabled) {
         if (!isEditMode && prefilledUrl != null) {
             val targetUrl = normalizeUrlForAi(prefilledUrl)
             if (targetUrl != null) {
diff --git a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AiActivityStatusLine.kt b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AiActivityStatusLine.kt
new file mode 100644
index 0000000..8dd765b
--- /dev/null
+++ b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AiActivityStatusLine.kt
@@ -0,0 +1,149 @@
+package com.jaeckel.urlvault.ui
+
+import androidx.compose.animation.AnimatedVisibility
+import androidx.compose.animation.fadeIn
+import androidx.compose.animation.fadeOut
+import androidx.compose.animation.slideInVertically
+import androidx.compose.animation.slideOutVertically
+import androidx.compose.foundation.layout.Arrangement
+import androidx.compose.foundation.layout.Row
+import androidx.compose.foundation.layout.fillMaxWidth
+import androidx.compose.foundation.layout.padding
+import androidx.compose.foundation.layout.size
+import androidx.compose.material3.CircularProgressIndicator
+import androidx.compose.material3.MaterialTheme
+import androidx.compose.material3.Surface
+import androidx.compose.material3.Text
+import androidx.compose.runtime.Composable
+import androidx.compose.ui.Alignment
+import androidx.compose.ui.Modifier
+import androidx.compose.ui.graphics.Color
+import androidx.compose.ui.text.font.FontFamily
+import androidx.compose.ui.unit.dp
+
+/**
+ * State for the bottom AI-activity strip. Replaces the debug Toast that
+ * used to surface router decisions on every `generateXxx` call. Auto-hide
+ * is a presentation concern and lives in the caller — this composable just
+ * renders whatever it's told to.
+ */
+sealed class AiActivityState {
+    data object Hidden : AiActivityState()
+
+    /** A provider was picked; inference is in flight. Shows a spinner. */
+    data class Running(
+        val action: String,
+        val providerName: String,
+    ) : AiActivityState()
+
+    /** Inference finished. Shows the wall-clock duration. */
+    data class Completed(
+        val action: String,
+        val providerName: String,
+        val durationMs: Long,
+        val success: Boolean,
+    ) : AiActivityState()
+
+    /** Router could not pick a provider — UI surfaces the reason. */
+    data class NoProvider(
+        val action: String,
+        val reason: String,
+    ) : AiActivityState()
+}
+
+/**
+ * Slim auto-hiding strip rendered at the bottom of the app. Designed as the
+ * non-obstructive replacement for the debug Toast spam: a single line that
+ * slides up while AI work is in flight, then briefly shows the timing, then
+ * slides away.
+ *
+ * Add it as the **last child of your screen's Column** (with the screen
+ * content above it given `Modifier.weight(1f)`) so it claims real layout
+ * space when visible and pushes content up. Putting it in an overlaying
+ * `Box` will reintroduce the obscuring behaviour the original Toast had —
+ * the whole point of this strip is that buttons stay reachable while it's
+ * showing.
+ *
+ * Auto-hide of [AiActivityState.Completed] / [AiActivityState.NoProvider] is
+ * the caller's responsibility — use a `LaunchedEffect(state)` with a `delay`
+ * and reset to [AiActivityState.Hidden].
+ */
+@Composable
+fun AiActivityStatusLine(
+    state: AiActivityState,
+    modifier: Modifier = Modifier,
+) {
+    AnimatedVisibility(
+        visible = state !is AiActivityState.Hidden,
+        enter = fadeIn() + slideInVertically(initialOffsetY = { it }),
+        exit = fadeOut() + slideOutVertically(targetOffsetY = { it }),
+        modifier = modifier,
+    ) {
+        val (text, isRunning, isError) = when (state) {
+            is AiActivityState.Running -> Triple(
+                "${state.action}: ${state.providerName}…",
+                true,
+                false,
+            )
+            is AiActivityState.Completed -> Triple(
+                buildString {
+                    append(state.action)
+                    append(" via ")
+                    append(state.providerName)
+                    append(" — ")
+                    append(formatMs(state.durationMs))
+                    if (!state.success) append(" (failed)")
+                },
+                false,
+                !state.success,
+            )
+            is AiActivityState.NoProvider -> Triple(
+                "${state.action}: no model ready (${state.reason})",
+                false,
+                true,
+            )
+            // Hidden never reached here — AnimatedVisibility hides the slot.
+            AiActivityState.Hidden -> Triple("", false, false)
+        }
+
+        Surface(
+            color = if (isError) MaterialTheme.colorScheme.errorContainer
+                    else MaterialTheme.colorScheme.surfaceVariant,
+            contentColor = if (isError) MaterialTheme.colorScheme.onErrorContainer
+                           else MaterialTheme.colorScheme.onSurfaceVariant,
+            tonalElevation = 4.dp,
+            shadowElevation = 4.dp,
+        ) {
+            Row(
+                modifier = Modifier
+                    .fillMaxWidth()
+                    .padding(horizontal = 12.dp, vertical = 6.dp),
+                horizontalArrangement = Arrangement.spacedBy(8.dp),
+                verticalAlignment = Alignment.CenterVertically,
+            ) {
+                if (isRunning) {
+                    CircularProgressIndicator(
+                        modifier = Modifier.size(14.dp),
+                        strokeWidth = 2.dp,
+                        color = MaterialTheme.colorScheme.onSurfaceVariant,
+                    )
+                }
+                Text(
+                    text = text,
+                    style = MaterialTheme.typography.bodySmall,
+                    fontFamily = FontFamily.Monospace,
+                    color = Color.Unspecified,
+                )
+            }
+        }
+    }
+}
+
+private fun formatMs(ms: Long): String {
+    if (ms < 1000) return "$ms ms"
+    // Two decimal places without depending on String.format (not in commonMain).
+    val whole = ms / 1000
+    val hundredths = (ms % 1000) / 10
+    val padded = if (hundredths < 10) "0$hundredths" else "$hundredths"
+    return "$whole.$padded s"
+}
diff --git a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/BookmarkListScreen.kt b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/BookmarkListScreen.kt
index 387f199..cf103cd 100644
--- a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/BookmarkListScreen.kt
+++ b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/BookmarkListScreen.kt
@@ -17,6 +17,9 @@ import androidx.compose.foundation.layout.width
 import androidx.compose.foundation.lazy.LazyColumn
 import androidx.compose.foundation.lazy.LazyRow
 import androidx.compose.foundation.lazy.items
+import androidx.compose.material.icons.Icons
+import androidx.compose.material.icons.filled.Settings
+import androidx.compose.material.icons.filled.Sync
 import androidx.compose.material3.Card
 import androidx.compose.material3.CardDefaults
 import androidx.compose.material3.CircularProgressIndicator
@@ -117,17 +120,17 @@ fun BookmarkListScreen(
                                 strokeWidth = 2.dp
                             )
                         } else {
-                            Text(
-                                text = "\uD83D\uDD04",
-                                style = MaterialTheme.typography.titleMedium
+                            Icon(
+                                imageVector = Icons.Default.Sync,
+                                contentDescription = "Sync with Bitwarden",
                             )
                         }
                     }
                     // Settings button
                     IconButton(onClick = onOpenSettings) {
-                        Text(
-                            text = "\u2699\uFE0F",
-                            style = MaterialTheme.typography.titleMedium
+                        Icon(
+                            imageVector = Icons.Default.Settings,
+                            contentDescription = "Settings",
                         )
                     }
                 }
diff --git a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/ModelComparisonScreen.kt b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/ModelComparisonScreen.kt
index 20cac0d..9e41f14 100644
--- a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/ModelComparisonScreen.kt
+++ b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/ModelComparisonScreen.kt
@@ -35,6 +35,7 @@ import androidx.compose.ui.text.font.FontFamily
 import androidx.compose.ui.text.font.FontWeight
 import androidx.compose.ui.unit.dp
 import com.jaeckel.urlvault.ai.ModelComparisonRunner
+import com.jaeckel.urlvault.ai.ModelRuntime
 import kotlinx.coroutines.launch
 
 @OptIn(ExperimentalMaterial3Api::class)
@@ -212,7 +213,7 @@ private fun ProviderResultCard(result: ModelComparisonRunner.ProviderResult) {
                     fontWeight = FontWeight.SemiBold,
                 )
                 Text(
-                    text = result.runtime.name,
+                    text = runtimeLabel(result.runtime),
                     style = MaterialTheme.typography.labelSmall,
                     color = MaterialTheme.colorScheme.onSurfaceVariant,
                 )
@@ -270,3 +271,16 @@ private fun ResultLine(label: String, value: String, ms: Long) {
         )
     }
 }
+
+/**
+ * Human-friendly label for a runtime. The enum name `MEDIAPIPE` is a
+ * historical leftover from when the LiteRT-LM bundle was loaded via
+ * MediaPipe-LLM; the actual runtime today is LiteRT-LM, so render it that
+ * way in the UI rather than leaking the enum constant.
+ */
+private fun runtimeLabel(runtime: ModelRuntime): String = when (runtime) {
+    ModelRuntime.ML_KIT -> "AICore"
+    ModelRuntime.LLAMA_CPP -> "llama.cpp"
+    ModelRuntime.LEAP -> "Leap"
+    ModelRuntime.MEDIAPIPE -> "LiteRT-LM"
+}
diff --git a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/SettingsScreen.kt b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/SettingsScreen.kt
index e3c8fb0..05e45a3 100644
--- a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/SettingsScreen.kt
+++ b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/SettingsScreen.kt
@@ -78,6 +78,9 @@ fun SettingsScreen(
     onDeleteModel: (ModelCatalogEntry) -> Unit = {},
     onToggleModelActive: (ModelCatalogEntry, Boolean) -> Unit = { _, _ -> },
     onAddCustomModel: (hfRepo: String, hfFile: String, displayName: String) -> Unit = { _, _, _ -> },
+    hfToken: String = "",
+    hfTokenFromBuild: Boolean = false,
+    onHfTokenChanged: (String) -> Unit = {},
     onOpenComparison: () -> Unit = {},
     onSaveCredentials: (BitwardenCredentials) -> Unit,
     onNavigateBack: () -> Unit,
@@ -438,6 +441,12 @@ fun SettingsScreen(
                     }
                 }
 
+                HuggingFaceTokenRow(
+                    token = hfToken,
+                    fromBuild = hfTokenFromBuild,
+                    onTokenChanged = onHfTokenChanged,
+                )
+
                 CustomModelEntryRow(onAdd = onAddCustomModel)
 
                 Button(
@@ -690,6 +699,86 @@ private fun ModelCatalogRow(
     }
 }
 
+/**
+ * Lets the user paste a Hugging Face access token so the downloader can
+ * fetch gated repos (most LiteRT-LM Gemma bundles, FunctionGemma, etc.).
+ * Acceptance of each model's licence on huggingface.co is also required —
+ * the token alone doesn't grant access.
+ */
+@Composable
+private fun HuggingFaceTokenRow(
+    token: String,
+    fromBuild: Boolean,
+    onTokenChanged: (String) -> Unit,
+) {
+    // When neither a user-saved nor a build-time token exists, default the
+    // row to expanded so the user is nudged to enter one.
+    var expanded by remember(token, fromBuild) {
+        mutableStateOf(token.isBlank() && !fromBuild)
+    }
+    val masked = if (token.isBlank()) "" else token.take(4) + "…" + token.takeLast(4)
+
+    Column(
+        modifier = Modifier.fillMaxWidth(),
+        verticalArrangement = Arrangement.spacedBy(4.dp),
+    ) {
+        Row(
+            modifier = Modifier.fillMaxWidth(),
+            verticalAlignment = Alignment.CenterVertically,
+            horizontalArrangement = Arrangement.SpaceBetween,
+        ) {
+            Column(modifier = Modifier.weight(1f)) {
+                Text("Hugging Face token", style = MaterialTheme.typography.bodyLarge)
+                Text(
+                    text = when {
+                        token.isNotBlank() -> "Saved: $masked"
+                        fromBuild -> "Using token bundled with this build"
+                        else -> "Required for gated models (Gemma, FunctionGemma)"
+                    },
+                    style = MaterialTheme.typography.bodySmall,
+                    color = MaterialTheme.colorScheme.onSurfaceVariant,
+                )
+            }
+            Switch(checked = expanded, onCheckedChange = { expanded = it })
+        }
+        if (expanded) {
+            var draft by remember(token) { mutableStateOf(token) }
+            OutlinedTextField(
+                value = draft,
+                onValueChange = { draft = it },
+                label = { Text("hf_… (read access)") },
+                singleLine = true,
+                modifier = Modifier.fillMaxWidth(),
+            )
+            Text(
+                text = "Create one at huggingface.co/settings/tokens, then accept each gated " +
+                    "model's licence on its page (e.g. google/gemma-3-1b-it).",
+                style = MaterialTheme.typography.bodySmall,
+                color = MaterialTheme.colorScheme.onSurfaceVariant,
+            )
+            Row(horizontalArrangement = Arrangement.spacedBy(8.dp)) {
+                Button(
+                    onClick = {
+                        onTokenChanged(draft.trim())
+                        expanded = false
+                    },
+                    enabled = draft.trim() != token,
+                    modifier = Modifier.weight(1f),
+                ) { Text(if (token.isBlank()) "Save token" else "Update token") }
+                if (token.isNotBlank()) {
+                    Button(
+                        onClick = {
+                            onTokenChanged("")
+                            expanded = false
+                        },
+                        modifier = Modifier.weight(1f),
+                    ) { Text("Clear") }
+                }
+            }
+        }
+    }
+}
+
 @Composable
 private fun CustomModelEntryRow(
     onAdd: (hfRepo: String, hfFile: String, displayName: String) -> Unit,