diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 62a571b..871207d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -34,6 +34,12 @@ jobs: run: ./gradlew :shared:check --no-daemon - name: Build Android debug APK + env: + # Optional Hugging Face read-token baked into the APK at build + # time; needed to download gated LiteRT-LM / Gemma model bundles + # without the user pasting one into Settings. Repository secret; + # build still succeeds if absent (token defaults to empty). + HF_TOKEN: ${{ secrets.HF_TOKEN }} run: ./gradlew :androidApp:assembleDebug --no-daemon - name: Resolve APK artifact name diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7c13f0c..9bf031c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -49,6 +49,11 @@ jobs: ANDROID_STORE_PASSWORD: ${{ secrets.ANDROID_STORE_PASSWORD }} ANDROID_KEY_ALIAS: ${{ secrets.ANDROID_KEY_ALIAS }} ANDROID_KEY_PASSWORD: ${{ secrets.ANDROID_KEY_PASSWORD }} + # Optional Hugging Face read-token baked into the APK so gated + # LiteRT-LM / Gemma bundles can be downloaded without the user + # pasting a token. Empty / absent secret leaves the default empty + # and the user is prompted in Settings. + HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | ./gradlew :androidApp:assembleRelease --no-daemon \ -PappVersion=${{ steps.version.outputs.version }} \ diff --git a/README.md b/README.md index 2b18181..0bbdb93 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,46 @@ Per-platform repository implementations: ./gradlew :shared:build # Build the KMP library ``` +### Hugging Face Token (gated models) + +Most LiteRT-LM models in the catalog (Gemma 3, Gemma 4, FunctionGemma) are *gated* on Hugging Face — the API will reject downloads with HTTP 401 until two things are true: + +1. You hold a Hugging Face access token with **read** scope. Create one at . +2. You've accepted each model's licence on its HF page (e.g. ). Acceptance is per-repo and is a one-time click on the web UI. + +URLVault accepts the token from three sources, in this order of precedence: + +1. **User-entered** — Settings → Local AI Models → "Hugging Face token". Stored in `EncryptedSharedPreferences` on the device. Best for personal builds. +2. **Build-time `HF_TOKEN` env var** — read by `androidApp/build.gradle.kts` and exposed as `BuildConfig.HF_TOKEN_DEFAULT`. Used by CI. +3. **Build-time `hfToken` in `local.properties`** — same destination, fallback when the env var is absent. Used by local developer builds. + +The Settings row reads "Using token bundled with this build" when sources 2 or 3 are present and the user hasn't entered one of their own. + +#### Local developer builds + +Add a single line to `local.properties` at the repo root (already in `.gitignore` — the token never leaves your machine): + +```properties +hfToken=hf_xxxxxxxxxxxxxxxxxxxx +``` + +After that, `./gradlew :androidApp:assembleDebug` and `./gradlew :androidApp:installDebug` will pick the token up automatically. Or set the env var per-invocation: + +```bash +HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxx ./gradlew :androidApp:assembleDebug +``` + +#### CI builds + +Add `HF_TOKEN` as a repository secret on GitHub: + +- *Settings → Secrets and variables → Actions → New repository secret*, name `HF_TOKEN`. +- The existing `build.yml` and `release.yml` workflows already read it. With the secret absent, builds still succeed and the APK ships with the field empty (the user is prompted in Settings). + +> **Security note.** Anything baked into the APK can be recovered by reverse-engineering. Only ship a *read-only* token that is acceptable for the people who will install the build. The user-entered path stores the token in EncryptedSharedPreferences (Android Keystore-wrapped) and is the safer default for shared / public builds. + +The downloader scrubs the `Authorization` header on cross-origin redirects (HF 302s gated downloads to a pre-signed CDN URL on `cas-bridge.xethub.hf.co`, which would otherwise reject the extra header with 401), so the token only travels to `huggingface.co` itself. + ### iOS 1. Open `iosApp/iosApp.xcodeproj` in Xcode diff --git a/androidApp/build.gradle.kts b/androidApp/build.gradle.kts index 5398330..c669085 100644 --- a/androidApp/build.gradle.kts +++ b/androidApp/build.gradle.kts @@ -1,6 +1,7 @@ import org.jetbrains.kotlin.gradle.dsl.JvmTarget import java.nio.file.Files import java.util.Base64 +import java.util.Properties plugins { alias(libs.plugins.android.application) @@ -42,6 +43,31 @@ android { testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" + // Optional Hugging Face read-token, baked into the APK at build time + // so the downloader can fetch gated LiteRT-LM bundles without the + // user pasting a token. Two sources, in order of precedence: + // 1. HF_TOKEN env var — used by CI (GitHub Actions secret). + // 2. `hfToken` property in /local.properties — used for + // local developer builds. local.properties is gitignored so + // the token never leaves the developer's machine. + // Empty default lets the build succeed without either; the user can + // paste a token into the Settings screen instead. + // Whitespace and any non-token characters are stripped to keep the + // generated string literal safe — real HF tokens are alphanumeric + // with `_` / `-`. Note: anything baked into the APK is recoverable + // via reverse engineering — only ship a *read-only* HF token here. + val hfTokenFromLocalProps: String? = rootProject.file("local.properties") + .takeIf { it.exists() } + ?.let { f -> + val props = Properties() + f.inputStream().use { stream -> props.load(stream) } + props.getProperty("hfToken") + } + val hfTokenDefault = (System.getenv("HF_TOKEN") ?: hfTokenFromLocalProps ?: "") + .trim() + .filter { it.isLetterOrDigit() || it == '_' || it == '-' } + buildConfigField("String", "HF_TOKEN_DEFAULT", "\"$hfTokenDefault\"") + // Llamatik ships native libs for arm64-v8a, armeabi-v7a, x86, x86_64. // libllama_jni.so alone is ~23 MB per ABI; restricting to arm64-v8a cuts // ~90 MB of unused code from the APK. Every supported Android device diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/MainActivity.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/MainActivity.kt index 6c02822..09c9607 100644 --- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/MainActivity.kt +++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/MainActivity.kt @@ -2,11 +2,14 @@ package com.jaeckel.urlvault.android import android.content.Intent import android.os.Bundle -import android.widget.Toast import androidx.activity.ComponentActivity +import androidx.activity.compose.BackHandler import androidx.activity.compose.setContent import androidx.activity.enableEdgeToEdge +import androidx.compose.foundation.layout.Box import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.foundation.layout.navigationBarsPadding import androidx.compose.foundation.layout.statusBarsPadding import androidx.compose.ui.Modifier import androidx.compose.runtime.LaunchedEffect @@ -16,6 +19,7 @@ import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.produceState import androidx.compose.runtime.remember import androidx.compose.runtime.setValue +import kotlinx.coroutines.delay import com.jaeckel.urlvault.ai.AiProviderIds import com.jaeckel.urlvault.ai.ModelCatalog import com.jaeckel.urlvault.ai.ModelCatalogEntry @@ -30,6 +34,8 @@ import com.jaeckel.urlvault.android.sync.AndroidBitwardenPreferences import com.jaeckel.urlvault.model.Bookmark import com.jaeckel.urlvault.sync.BitwardenSyncService import com.jaeckel.urlvault.ui.AddEditBookmarkScreen +import com.jaeckel.urlvault.ui.AiActivityState +import com.jaeckel.urlvault.ui.AiActivityStatusLine import com.jaeckel.urlvault.ui.BookmarkListScreen import com.jaeckel.urlvault.ui.ModelComparisonScreen import com.jaeckel.urlvault.ui.ModelStatusBanner @@ -85,6 +91,11 @@ class MainActivity : ComponentActivity() { val warmingIds by localModelRouter.warmingIds.collectAsState() var customEntries by remember { mutableStateOf(localModelPrefs.loadCustomEntries()) } var activeIds by remember { mutableStateOf(localModelPrefs.loadActiveIds()) } + // The user-only token (the build-time fallback isn't shown as + // a saved value — the row says "Using token bundled with this + // build" instead). + var hfToken by remember { mutableStateOf(localModelPrefs.loadUserHfToken().orEmpty()) } + val hfTokenFromBuild = remember { localModelPrefs.hasBuildTimeHfToken() } // Settings reads two heavy values from EncryptedSharedPreferences: // the Bitwarden credentials (decrypts via Keystore) and the // field-history blob. Cache them in remembered state and only @@ -99,28 +110,47 @@ class MainActivity : ComponentActivity() { aiCoreService.initialize() } - // DEBUG-only: surface which provider actually served each AI call - // so we can confirm an "activated" model is what's being used vs. - // silently falling back to AICore. + // DEBUG-only: surface which provider actually served each AI + // call (and how long it took) in a thin auto-hiding strip at + // the bottom of the screen. Replaces a much louder Toast that + // obscured the form while the user was trying to interact + // with it. + var aiActivity by remember { mutableStateOf(AiActivityState.Hidden) } if (BuildConfig.DEBUG) { LaunchedEffect(Unit) { localModelRouter.events.collect { event -> - val readinessLine = event.readiness.joinToString { (id, r) -> - "${id.substringAfter(':')}=${if (r) "✓" else "✗"}" - } - val activeLine = if (event.activeIds.isEmpty()) "active=none" - else "active=${event.activeIds.joinToString { it.substringAfter(':') }}" - val head = when (event) { + aiActivity = when (event) { is LocalModelRouter.RouteEvent.Picked -> - "AI ${event.action}: ${event.providerName}\n${event.reason}" + AiActivityState.Running(event.action, event.providerName) + is LocalModelRouter.RouteEvent.Completed -> + AiActivityState.Completed( + action = event.action, + providerName = event.providerName, + durationMs = event.durationMs, + success = event.success, + ) is LocalModelRouter.RouteEvent.None -> - "AI ${event.action}: NO PROVIDER\n${event.reason}" + AiActivityState.NoProvider(event.action, event.reason) } - val text = "$head\n$activeLine\n$readinessLine" - Toast.makeText(this@MainActivity, text, Toast.LENGTH_LONG).show() } } } + // Auto-hide once the user has had time to read the result. + // Running stays visible for as long as the LLM is working + // (we only transition out of it when Completed/None arrive). + LaunchedEffect(aiActivity) { + when (aiActivity) { + is AiActivityState.Completed -> { + delay(3_500) + aiActivity = AiActivityState.Hidden + } + is AiActivityState.NoProvider -> { + delay(5_000) + aiActivity = AiActivityState.Hidden + } + else -> {} + } + } // Show toggle for any status except Unknown (still probing) val aiCoreAvailable = aiCoreStatus !is AICoreStatus.Unknown && aiCoreStatus !is AICoreStatus.Unavailable @@ -152,11 +182,34 @@ class MainActivity : ComponentActivity() { } } + // Without an explicit BackHandler, the system back gesture + // bypasses our in-memory `currentScreen` state and finishes + // the Activity — i.e. tapping back from Settings exits the + // app instead of returning to the bookmark list. Mirror the + // in-screen back arrows: Comparison → Settings; Settings and + // AddEdit → List. List is the root, so the handler is + // disabled there and the OS default (finish) applies. + BackHandler(enabled = currentScreen !is Screen.List) { + currentScreen = when (currentScreen) { + is Screen.Comparison -> Screen.Settings + is Screen.Settings, is Screen.AddEdit -> Screen.List + is Screen.List -> Screen.List // unreachable + } + } + Column( - // enableEdgeToEdge() lets content draw under the status - // bar; without statusBarsPadding the banner would land - // behind the system clock / battery icons. - modifier = Modifier.statusBarsPadding(), + // enableEdgeToEdge() lets content draw under the system + // bars; the two *barsPadding modifiers reserve space at + // top and bottom AND consume the corresponding insets so + // descendants (notably the screens' Material Scaffolds + // with BottomAppBar) don't double-pad. Without this, the + // BottomAppBar kept its own gesture-pill padding even + // when the AI activity strip slid in below it, making + // the button row's box visibly grow. + modifier = Modifier + .fillMaxSize() + .statusBarsPadding() + .navigationBarsPadding(), ) { // Persistent status banner — surfaces the active model // warming up or any in-flight download regardless of which @@ -169,6 +222,12 @@ class MainActivity : ComponentActivity() { catalog = ModelCatalog.builtIn + customEntries, aiCoreId = AiProviderIds.AICORE, ) + // Wrap the active screen in a weighted Box so the AI + // activity strip below can claim its natural height + // without overlapping the screen's own bottom buttons — + // when the strip is visible the screen's available + // height shrinks and its Save / Cancel row reflows up. + Box(modifier = Modifier.weight(1f).fillMaxSize()) { when (val screen = currentScreen) { is Screen.List -> BookmarkListScreen( viewModel = bookmarkViewModel, @@ -268,6 +327,12 @@ class MainActivity : ComponentActivity() { // generate() call doesn't pay model-load cost. if (active) appScope.launch { localModelRouter.warmUpActive() } }, + hfToken = hfToken, + hfTokenFromBuild = hfTokenFromBuild, + onHfTokenChanged = { newToken -> + hfToken = newToken + localModelPrefs.saveHfToken(newToken) + }, onAddCustomModel = { hfRepo, hfFile, displayName -> val newEntry = ModelCatalogEntry( id = "custom:" + hfRepo.lowercase().replace('/', '_') + ":" + hfFile.lowercase(), @@ -302,7 +367,19 @@ class MainActivity : ComponentActivity() { ) } } - } // close Column wrapping the banner + screen content + } // close weighted Box wrapping the screen + + // DEBUG-only AI activity strip. Last child of the Column + // so when AnimatedVisibility expands it from 0-height + // the screen above is pushed up — its Save button stays + // visible. The outer Column already consumed the nav + // bar inset, so the strip needs no padding of its own. + if (BuildConfig.DEBUG) { + AiActivityStatusLine( + state = aiActivity, + ) + } + } // close outer Column } } } diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/AICoreService.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/AICoreService.kt index 8a974f5..0b4dc6f 100644 --- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/AICoreService.kt +++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/AICoreService.kt @@ -288,13 +288,26 @@ class AICoreService(httpClient: HttpClient) { /** * Generate a 1-2 sentence description for a bookmark. - * Fetches the web page to provide context for an accurate description. + * + * Same shape as [generateTitle]: if the page itself carries a + * publisher-written summary (`` or + * ``), return it verbatim — the LLM can't beat + * what the author wrote about their own page, and burning a Gemini Nano + * call to "rewrite" an existing 1-2 sentence summary is wasted work + * that often degrades the result. The LLM only fires for pages with no + * metadata-provided description, where genuine extraction from + * `visibleText` is needed. */ suspend fun generateDescription(url: String, title: String): Result { return runCatching { val pageContent = fetchPageContent(url) - val pageSummary = pageContent?.bestSummary(MAX_PAGE_CONTENT_LENGTH) ?: "" + val nativeDesc = pageContent?.let { it.ogDescription ?: it.metaDescription } + if (!nativeDesc.isNullOrBlank()) { + return@runCatching validateDescription(nativeDesc.trim()) + } + + val pageSummary = pageContent?.visibleText.orEmpty().take(MAX_PAGE_CONTENT_LENGTH) val prompt = buildString { appendLine("Write a 1-2 sentence factual description for this bookmark.") appendLine("Return ONLY the description, nothing else.") @@ -305,15 +318,12 @@ class AICoreService(httpClient: HttpClient) { appendLine("Title: $title") } if (pageSummary.isNotBlank()) { - appendLine("Page summary: $pageSummary") + appendLine("Page text: $pageSummary") } else { appendLine("If you cannot determine what the page is about, respond with: Unable to generate description.") } } - - // See generateTags() — inline runBenchmarking removed for the - // same reason; explicit comparison lives in - // ModelComparisonScreen. + validateDescription(runInference(prompt).trim()) } } diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LeapModelProvider.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LeapModelProvider.kt index 1627861..bda1bd9 100644 --- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LeapModelProvider.kt +++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LeapModelProvider.kt @@ -114,7 +114,24 @@ class LeapModelProvider( override suspend fun generateDescription(url: String, title: String): Result = runCatching { val pageContent = runCatching { contentExtractor.extract(url) }.getOrNull() - val pageSummary = pageContent?.bestSummary(MAX_PAGE_CONTENT_LENGTH).orEmpty() + + // Short-circuit on a page-provided description — same shape as + // generateTitle. Two reasons this matters specifically for LFM2- + // Extract: + // - it's an *extraction* fine-tune, not a generation one. Asking + // it to rewrite an already-good summary just wastes a model + // call; + // - on pages where the supplied text has nothing extractable, the + // grammar's `minLength: 1` cornering produces degenerate + // sequences like `:","",..."` (see `looksDegenerate`). Skipping + // the LLM entirely when a usable description is already + // available eliminates that failure mode for those pages. + val nativeDesc = pageContent?.let { it.ogDescription ?: it.metaDescription } + if (!nativeDesc.isNullOrBlank()) { + return@runCatching validateDescription(nativeDesc.trim()) + } + + val pageSummary = pageContent?.visibleText.orEmpty().take(MAX_PAGE_CONTENT_LENGTH) val schema = """ { @@ -135,9 +152,21 @@ class LeapModelProvider( // this as "extract a summary from the supplied text" rather than // "write a description"; otherwise the model has nothing to extract, // the grammar still forces a non-empty string, and we get garbage - // (the original prompt produced a single-comma description). + // (the original prompt produced a single-comma description; a later + // observed regression produced `{"description":":\",\",..."}` — + // valid JSON shape, garbage value, when supplied text was thin). + // Defences against that mode: + // - state explicitly that real natural-language sentences are + // required and that punctuation-only output is wrong; + // - give the model a concrete fallback to emit when there's + // nothing to extract, so it doesn't have to invent garbage to + // satisfy the grammar. + // The provider also rejects degenerate output post-hoc — see + // `looksDegenerate`. val task = buildString { - appendLine("Extract a 1-2 sentence summary describing what the web page below is about. Use only information present in the supplied text.") + appendLine("Extract a 1-2 sentence summary describing what the web page below is about, using only information present in the supplied text.") + appendLine("The summary must be real English (or German) sentences with normal words and spaces — never punctuation-only output.") + appendLine("If the supplied text does not contain enough information to summarise, return exactly: No summary available.") appendLine() appendLine("URL: $url") if (title.isNotBlank()) appendLine("Title: $title") @@ -145,9 +174,9 @@ class LeapModelProvider( appendLine("Page content:") appendLine(pageSummary) } else { - // No page content fetched — give the model something concrete - // to extract from rather than asking it to invent prose. - appendLine("Page content: (unavailable — derive a one-sentence summary from the URL and title only)") + // No page content fetched — explicitly authorise the + // canonical fallback rather than asking for invented prose. + appendLine("Page content: (unavailable — return: No summary available.)") } appendLine() appendLine("Return the extracted summary as the \"description\" field.") @@ -159,7 +188,12 @@ class LeapModelProvider( } Log.i(TAG, "[$id] description raw: $raw") - validateDescription(parseJson(raw).description.trim()) + val text = parseJson(raw).description.trim() + if (looksDegenerate(text)) { + Log.w(TAG, "[$id] description rejected as degenerate: ${text.take(80)}") + error("Model produced degenerate output (no extractable content)") + } + validateDescription(text) } override suspend fun generateTitle(url: String): Result = runCatching { @@ -203,7 +237,33 @@ class LeapModelProvider( } Log.i(TAG, "[$id] title raw: $raw") - parseJson(raw).title.trim().removeSurrounding("\"") + val text = parseJson(raw).title.trim().removeSurrounding("\"") + if (looksDegenerate(text)) { + Log.w(TAG, "[$id] title rejected as degenerate: ${text.take(80)}") + error("Model produced degenerate output (no extractable content)") + } + text + } + + /** + * Heuristic to catch the LFM2-Extract failure mode where the grammar- + * constrained sampler forces a non-empty string but the supplied text + * has nothing to extract — the model fills the budget with degenerate + * sequences like `:","","",...`. JSON shape is valid; value is garbage. + * + * Real natural-language output is mostly letters with reasonable + * character diversity. Reject anything that fails both bars so the UI + * surfaces "AI generation failed" instead of persisting garbage. + */ + private fun looksDegenerate(text: String): Boolean { + val trimmed = text.trim() + if (trimmed.length < 5) return true + val letterCount = trimmed.count { it.isLetter() } + val letterRatio = letterCount.toDouble() / trimmed.length + if (letterRatio < 0.4) return true + val distinctChars = trimmed.toSet().size + if (distinctChars < 5) return true + return false } /** diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmModelProvider.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmModelProvider.kt index 77a1175..f5fd9c0 100644 --- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmModelProvider.kt +++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmModelProvider.kt @@ -40,6 +40,15 @@ class LiteRtLmModelProvider( override suspend fun isReady(): Boolean = bridge.isAvailable() + /** + * Backend the SDK ended up loading on (`"NPU"` / `"GPU"` / `"CPU"`), + * or null if no model is loaded yet. Read by `LocalModelRouter` to + * enrich the debug provenance tag — the saved bookmark then carries + * `liteRt[GPU]:gemma-3-1b-it-int4:2.34s` so it's obvious at a glance + * whether NPU/GPU acceleration was actually in play. + */ + fun currentBackendLabel(): String? = bridge.currentBackendLabel() + override suspend fun preload() { // Same mutex as the generate path so an inference call can't race a // warm-up into the LiteRT-LM Engine constructor. @@ -115,7 +124,20 @@ class LiteRtLmModelProvider( override suspend fun generateDescription(url: String, title: String): Result = runCatching { val pageContent = runCatching { contentExtractor.extract(url) }.getOrNull() - val pageSummary = pageContent?.bestSummary(MAX_PAGE_CONTENT_LENGTH).orEmpty() + + // Short-circuit on a page-provided description — same shape as + // generateTitle. Most pages carry a publisher-written + // og:description / already optimised for + // social-card / SERP display; the LLM rewrite is wasted work and + // on Tensor CPU here it costs ~1–5 seconds per call. Skip + // straight to it. The model fires only when the page has no + // metadata-provided description. + val nativeDesc = pageContent?.let { it.ogDescription ?: it.metaDescription } + if (!nativeDesc.isNullOrBlank()) { + return@runCatching validateDescription(nativeDesc.trim()) + } + + val pageSummary = pageContent?.visibleText.orEmpty().take(MAX_PAGE_CONTENT_LENGTH) val example = """{"description": "A Kotlin Multiplatform tutorial covering shared UI with Compose."}""" diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmNativeBridge.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmNativeBridge.kt index 8ee427e..2e4e8eb 100644 --- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmNativeBridge.kt +++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmNativeBridge.kt @@ -18,6 +18,14 @@ interface LiteRtLmNativeBridge { /** Whether LiteRT-LM loaded successfully and the device can run inference. */ fun isAvailable(): Boolean + /** + * Label for the currently loaded backend (`"NPU"` / `"GPU"` / `"CPU"`), + * or null if no model is loaded. Surfaced in the debug provenance tag + * so the saved bookmark answers "did it run on NPU/GPU/CPU?" at a + * glance, without having to dig through logcat. + */ + fun currentBackendLabel(): String? = null + /** * Loads the `.litertlm` bundle at [absolutePath] into memory. Idempotent * per path: a repeated call with the same path is a no-op; a different diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmSdkBridge.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmSdkBridge.kt index 9a04063..9235376 100644 --- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmSdkBridge.kt +++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LiteRtLmSdkBridge.kt @@ -30,9 +30,23 @@ fun interface LiteRtLmBackendStrategy { /** * NPU first when the device's `nativeLibraryDir` is non-blank (vendor libs - * are loaded from there for QCS / Pixel chips), then GPU, then CPU. On - * unsupported devices the NPU init throws and `load()` falls through to - * the next backend. + * are loaded from there for QCS / Pixel chips), then **CPU**, then GPU. + * + * GPU is intentionally last. On every Pixel Tensor we've tested (G2 on + * Pixel 7a, G5 on Pixel 10 Pro Fold) the GPU engine loads but the first + * generate call throws `Can not find OpenCL library on this device` — + * LiteRT-LM 0.10.x auto-selects an OpenCL Top-K sampler from the engine + * backend and Tensor doesn't ship OpenCL drivers. The SDK has no public + * knob to use the CPU sampler with a GPU engine, so on Tensor the only + * way to get a working sampler is to run the engine on CPU too. Putting + * CPU before GPU avoids a wasted ~5–10 s GPU load + failed generate + * cycle on every cold start on those devices. + * + * Cost: on a hypothetical device with working OpenCL drivers we'd miss + * the GPU speedup. We don't currently have such a test device and the + * "correct on Tensor" trade is much more important. The runtime + * self-heal in [LiteRtLmSdkBridge.runCollect] still catches the OpenCL + * error if a custom strategy puts GPU first. */ object DefaultBackendStrategy : LiteRtLmBackendStrategy { override fun candidates(nativeLibDir: String): List> { @@ -40,9 +54,9 @@ object DefaultBackendStrategy : LiteRtLmBackendStrategy { if (nativeLibDir.isNotBlank()) { list.add("NPU" to Backend.NPU(nativeLibDir)) } - list.add("GPU" to Backend.GPU()) // null = default thread count picked by the runtime. list.add("CPU" to Backend.CPU(null)) + list.add("GPU" to Backend.GPU()) return list } } @@ -70,6 +84,16 @@ class LiteRtLmSdkBridge( private var currentPath: String? = null private var currentBackend: String? = null + /** + * Backends that *initialised successfully* but then failed at runtime + * during `generateContent` (e.g. Pixel 7a's Tensor G2 GPU loads fine but + * the Top-K sampler tries to dlopen OpenCL and the Tensor stack has none, + * so `runCollect` throws `Can not find OpenCL library on this device`). + * Filtered out of subsequent loads in this process so the bridge doesn't + * keep redoing the same dance every call. Cleared on app process death. + */ + private val runtimeBlockedBackends = mutableSetOf() + private val classLoaderProbe: Boolean by lazy { try { Class.forName("com.google.ai.edge.litertlm.Engine") @@ -83,65 +107,93 @@ class LiteRtLmSdkBridge( override fun isAvailable(): Boolean = classLoaderProbe + override fun currentBackendLabel(): String? = currentBackend + override suspend fun load(absolutePath: String) { mutex.withLock { if (currentPath == absolutePath && engine != null) { Log.v(TAG, "load: already loaded $absolutePath, no-op") return } - withContext(Dispatchers.IO) { - engine?.let { - Log.i(TAG, "load: switching model — closing previous $currentPath") - runCatching { it.close() } - } - engine = null - currentPath = null - currentBackend = null + loadInternalLocked(absolutePath) + } + } - val cacheDir = File(context.cacheDir, "litertlm").also { it.mkdirs() } - val nativeLibDir = context.applicationInfo.nativeLibraryDir.orEmpty() - val backendsToTry = backendStrategy.candidates(nativeLibDir) - - var lastError: Throwable? = null - for ((label, backend) in backendsToTry) { - val t0 = System.currentTimeMillis() - Log.i(TAG, "load: trying backend=$label for $absolutePath") - val candidate = Engine( - EngineConfig( - modelPath = absolutePath, - backend = backend, - visionBackend = backend, - audioBackend = backend, - maxNumTokens = null, - maxNumImages = null, - cacheDir = cacheDir.absolutePath, - ), - ) - val initOk = runCatching { candidate.initialize() } - if (initOk.isSuccess) { - engine = candidate - currentPath = absolutePath - currentBackend = label - Log.i( - TAG, - "load: ready on $label in ${System.currentTimeMillis() - t0}ms — $absolutePath", - ) - return@withContext - } else { - lastError = initOk.exceptionOrNull() - Log.w( - TAG, - "load: backend=$label failed (${lastError?.message}); trying next", - ) - runCatching { candidate.close() } - } - } - val tried = backendsToTry.joinToString(" → ") { it.first } + /** + * Same logic as [load] but assumes the caller already holds [mutex]. + * Exists so [runCollect] can reload the engine on the next backend after + * an OpenCL-style runtime failure without dropping and re-acquiring the + * mutex (which would let another caller race in mid-recovery). + */ + private suspend fun loadInternalLocked(absolutePath: String) { + withContext(Dispatchers.IO) { + engine?.let { + Log.i(TAG, "load: switching model — closing previous $currentPath") + runCatching { it.close() } + } + engine = null + currentPath = null + currentBackend = null + + val cacheDir = File(context.cacheDir, "litertlm").also { it.mkdirs() } + val nativeLibDir = context.applicationInfo.nativeLibraryDir.orEmpty() + val backendsToTry = backendStrategy.candidates(nativeLibDir) + .filterNot { (label, _) -> label in runtimeBlockedBackends } + + if (backendsToTry.isEmpty()) { throw IllegalStateException( - "LiteRT-LM failed on every backend ($tried). Last error: ${lastError?.message}", - lastError, + "LiteRT-LM has no usable backends left for this session " + + "(all blocked by prior runtime failures: $runtimeBlockedBackends)", ) } + + var lastError: Throwable? = null + for ((label, backend) in backendsToTry) { + val t0 = System.currentTimeMillis() + Log.i(TAG, "load: trying backend=$label for $absolutePath") + // visionBackend / audioBackend left null: every entry in + // ModelCatalog is text-only. Setting them to `backend` + // tells the engine to enable those modalities, and + // initialize() then fails with `NOT_FOUND: + // TF_LITE_VISION_ENCODER not found in the model.` for + // text-only bundles (FunctionGemma 270M, Gemma 3 270M, + // Qwen3 0.6B, etc.). When a true multi-modal Gemma 4 E2B + // bundle is added later, switch this on per-entry. + val candidate = Engine( + EngineConfig( + modelPath = absolutePath, + backend = backend, + visionBackend = null, + audioBackend = null, + maxNumTokens = null, + maxNumImages = null, + cacheDir = cacheDir.absolutePath, + ), + ) + val initOk = runCatching { candidate.initialize() } + if (initOk.isSuccess) { + engine = candidate + currentPath = absolutePath + currentBackend = label + Log.i( + TAG, + "load: ready on $label in ${System.currentTimeMillis() - t0}ms — $absolutePath", + ) + return@withContext + } else { + lastError = initOk.exceptionOrNull() + Log.w( + TAG, + "load: backend=$label failed (${lastError?.message}); trying next", + ) + runCatching { candidate.close() } + } + } + val tried = backendsToTry.joinToString(" → ") { it.first } + throw IllegalStateException( + "LiteRT-LM failed on every backend ($tried). Last error: ${lastError?.message}", + lastError, + ) } } @@ -168,6 +220,43 @@ class LiteRtLmSdkBridge( } private suspend fun runCollect(text: String, maxTokens: Int): String { + return try { + runCollectOnce(text, maxTokens) + } catch (t: Throwable) { + // Pixel 7a / Tensor G2: the GPU backend initialises fine but + // generation throws `Can not find OpenCL library on this device` + // because LiteRT-LM's Top-K sampler dlopens OpenCL even on the + // WebGPU path. Blocklist that backend so the *next* call reloads + // on the remaining strategy candidates (typically CPU). + // + // We deliberately do NOT reload + retry inline here. `Engine.close()` + // doesn't release the GPU pipeline's native memory synchronously + // — observed on Pixel 7a, the in-flight reload of the CPU engine + // briefly held both pipelines in RAM and the process peaked at + // ~5.96 GB, well past Pixel 7a's effective per-app budget. The + // LMK reaped the app and the user saw an unexplained "LiteRT + // crashed the app" with no FATAL exception in logcat. Bailing + // out here keeps peak memory at 1× model and lets the very next + // entry-point call (provider.generateXxx → bridge.load) start + // from a clean slate with the blocklist already applied. + val brokenBackend = currentBackend + if (brokenBackend != null && isRecoverableRuntimeError(t)) { + Log.w( + TAG, + "Recovering from $brokenBackend runtime failure (${t.message?.take(120)}) — " + + "blocklisting; next request will reload on remaining backends.", + ) + runtimeBlockedBackends += brokenBackend + runCatching { engine?.close() } + engine = null + currentPath = null + currentBackend = null + } + throw t + } + } + + private suspend fun runCollectOnce(text: String, maxTokens: Int): String { val current = engine ?: error("LiteRT-LM: no model loaded") // maxNumTokens here is advisory — the SDK still respects the config- // level cap. We pass through whatever sampling the user requests. @@ -196,6 +285,16 @@ class LiteRtLmSdkBridge( } } + /** + * Recoverable = the engine loaded but a runtime feature it tried to use + * isn't on this device. Right now the only known case is OpenCL missing + * on Pixel Tensor; widen as we hit more. + */ + private fun isRecoverableRuntimeError(t: Throwable): Boolean { + val msg = (t.message ?: "").lowercase() + return "opencl" in msg || "open cl" in msg + } + override suspend fun unload() { mutex.withLock { withContext(Dispatchers.IO) { diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LlamaCppModelProvider.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LlamaCppModelProvider.kt index 7d1d4af..32fbd62 100644 --- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LlamaCppModelProvider.kt +++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LlamaCppModelProvider.kt @@ -77,8 +77,19 @@ class LlamaCppModelProvider( override suspend fun generateDescription(url: String, title: String): Result = runCatching { val pageContent = runCatching { contentExtractor.extract(url) }.getOrNull() - val pageSummary = pageContent?.bestSummary(MAX_PAGE_CONTENT_LENGTH).orEmpty() + // Same short-circuit as generateTitle: prefer the publisher's own + // description (og:description / ) over a + // model rewrite. The GGUF model is most useful when the page has + // *no* metadata-provided summary; otherwise we just spend several + // seconds rewriting a 1-2 sentence string into a slightly worse + // 1-2 sentence string. + val nativeDesc = pageContent?.let { it.ogDescription ?: it.metaDescription } + if (!nativeDesc.isNullOrBlank()) { + return@runCatching validateDescription(nativeDesc.trim()) + } + + val pageSummary = pageContent?.visibleText.orEmpty().take(MAX_PAGE_CONTENT_LENGTH) val prompt = buildString { appendLine("Write a 1-2 sentence factual description for this bookmark.") appendLine("Return ONLY the description, nothing else.") @@ -87,7 +98,7 @@ class LlamaCppModelProvider( appendLine("URL: $url") if (title.isNotBlank()) appendLine("Title: $title") if (pageSummary.isNotBlank()) { - appendLine("Page summary: $pageSummary") + appendLine("Page text: $pageSummary") } else { appendLine("If you cannot determine what the page is about, respond with: Unable to generate description.") } diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelPreferences.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelPreferences.kt index 303da5e..6627ecf 100644 --- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelPreferences.kt +++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelPreferences.kt @@ -5,6 +5,7 @@ import android.content.SharedPreferences import androidx.security.crypto.EncryptedSharedPreferences import androidx.security.crypto.MasterKey import com.jaeckel.urlvault.ai.ModelCatalogEntry +import com.jaeckel.urlvault.android.BuildConfig import kotlinx.serialization.encodeToString import kotlinx.serialization.json.Json @@ -51,7 +52,24 @@ class LocalModelPreferences(private val context: Context) { prefs.edit().putStringSet(KEY_ACTIVE_IDS, ids.toSet()).apply() } - fun loadHfToken(): String? = prefs.getString(KEY_HF_TOKEN, null) + /** + * User-saved token wins; if blank, fall back to [BuildConfig.HF_TOKEN_DEFAULT] + * so a CI build that injected `HF_TOKEN` can ship gated-model access without + * any user action. Returns null when neither source has a token. + */ + fun loadHfToken(): String? { + val saved = prefs.getString(KEY_HF_TOKEN, null)?.takeIf { it.isNotBlank() } + if (saved != null) return saved + return BuildConfig.HF_TOKEN_DEFAULT.takeIf { it.isNotBlank() } + } + + /** + * The literal user-entered value (without the build-time fallback) so the + * Settings UI can show "(none)" vs. "saved: hf_…" honestly. Use + * [loadHfToken] for the value the downloader should actually send. + */ + fun loadUserHfToken(): String? = + prefs.getString(KEY_HF_TOKEN, null)?.takeIf { it.isNotBlank() } fun saveHfToken(token: String?) { prefs.edit().apply { @@ -59,6 +77,9 @@ class LocalModelPreferences(private val context: Context) { }.apply() } + /** True iff the APK was built with a non-empty `HF_TOKEN` env var. */ + fun hasBuildTimeHfToken(): Boolean = BuildConfig.HF_TOKEN_DEFAULT.isNotBlank() + companion object { private const val PREFS_NAME = "urlvault_local_models_encrypted" private const val KEY_CUSTOM_ENTRIES = "custom_entries" diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelRouter.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelRouter.kt index 5847593..ce2f491 100644 --- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelRouter.kt +++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/LocalModelRouter.kt @@ -3,6 +3,9 @@ package com.jaeckel.urlvault.android.ai import android.util.Log import com.jaeckel.urlvault.ai.LocalModelProvider import com.jaeckel.urlvault.ai.LocalModelRegistry +import com.jaeckel.urlvault.ai.ModelRuntime +import com.jaeckel.urlvault.android.BuildConfig +import kotlinx.coroutines.channels.BufferOverflow import kotlinx.coroutines.flow.MutableSharedFlow import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.SharedFlow @@ -51,9 +54,33 @@ class LocalModelRouter( override val readiness: List>, val reason: String, ) : RouteEvent() + + /** + * Fired by `generateXxx` *after* the provider call returns or throws. + * Carries the wall-clock duration so a UI status line can show + * "tags via Liquid LFM2 Extract — 1247 ms". Note that for `title` on + * pages with a usable ``/`og:title`, no LLM ran — duration + * reflects only the page fetch, which is intentional. + */ + data class Completed( + override val action: String, + override val activeIds: Set<String>, + override val readiness: List<Pair<String, Boolean>>, + val providerId: String, + val providerName: String, + val durationMs: Long, + val success: Boolean, + ) : RouteEvent() } - private val _events = MutableSharedFlow<RouteEvent>(extraBufferCapacity = 16) + // DROP_OLDEST so a slow / backgrounded collector can never stall the + // generate path or silently lose the latest event. The UI only cares + // about *current* state, so dropping older Picked/Completed pairs is + // safer than letting tryEmit return false for the most recent one. + private val _events = MutableSharedFlow<RouteEvent>( + extraBufferCapacity = 16, + onBufferOverflow = BufferOverflow.DROP_OLDEST, + ) val events: SharedFlow<RouteEvent> = _events.asSharedFlow() /** @@ -109,33 +136,6 @@ class LocalModelRouter( return PickResult(fallback, reason, active, readinessSummary) } - private suspend fun pickAndEmit(action: String): LocalModelProvider? { - val result = pickWithReason() - val provider = result.provider - if (provider != null) { - _events.tryEmit( - RouteEvent.Picked( - action = action, - activeIds = result.activeIds, - readiness = result.readiness, - providerId = provider.id, - providerName = provider.displayName, - reason = result.reason, - ), - ) - } else { - _events.tryEmit( - RouteEvent.None( - action = action, - activeIds = result.activeIds, - readiness = result.readiness, - reason = result.reason, - ), - ) - } - return provider - } - /** * Whether at least one registered provider can serve a request right now. * Used by the UI to decide whether to drive bookmark generation through @@ -181,20 +181,147 @@ class LocalModelRouter( } suspend fun generateTags(url: String, title: String, content: String): Result<List<String>> { - val provider = pickAndEmit("tags") - ?: return Result.failure(IllegalStateException("No ready local AI model")) - return provider.generateTags(url, title, content) + val pick = pickWithReason() + val provider = pick.provider + if (provider == null) { + emitNone("tags", pick) + return Result.failure(IllegalStateException("No ready local AI model")) + } + emitPicked("tags", provider, pick) + val t0 = System.nanoTime() + val result = runTimed("tags", provider, pick) { + provider.generateTags(url, title, content) + } + val durationMs = (System.nanoTime() - t0) / 1_000_000 + // DEBUG-only: append a synthetic tag of the form + // `<sdk>:<model>:<duration>` (e.g. `leap:lfm2-1.2b-extract:2.34s`) + // so a glance at the saved bookmark tells you SDK, model variant, + // and how long generation took. Stripped in release builds so + // synced Bitwarden entries never carry the marker into production. + return if (BuildConfig.DEBUG) { + result.map { it + debugProvenanceTag(provider, durationMs) } + } else { + result + } + } + + private fun debugProvenanceTag(provider: LocalModelProvider, durationMs: Long): String { + val sdk = when (provider.runtime) { + ModelRuntime.ML_KIT -> "aicore" + ModelRuntime.LLAMA_CPP -> "llama" + ModelRuntime.LEAP -> "leap" + ModelRuntime.MEDIAPIPE -> "liteRt" + } + // For LiteRT-LM, append the backend label the SDK actually picked + // (NPU/GPU/CPU) so the saved bookmark answers "did acceleration + // engage?" without having to grep logcat. The other runtimes don't + // expose a comparable concept (AICore is system-managed, llama.cpp + // and Leap are CPU-only here), so the suffix only fires for LiteRT. + val backendSuffix = (provider as? LiteRtLmModelProvider) + ?.currentBackendLabel() + ?.let { "[$it]" } + .orEmpty() + // provider.id is `<runtime-prefix>:<model-id>` (e.g. + // `leap:lfm2-1.2b-extract`); strip the prefix so we can substitute + // the shorter SDK name without duplicating the runtime label. + val model = provider.id.substringAfter(':', missingDelimiterValue = provider.id) + // ms below 1s, two-decimal seconds above. Avoids `String.format` + // (host-locale-dependent) by doing the math directly. + val duration = if (durationMs < 1000) { + "${durationMs}ms" + } else { + val whole = durationMs / 1000 + val hundredths = (durationMs % 1000) / 10 + val padded = if (hundredths < 10) "0$hundredths" else "$hundredths" + "$whole.${padded}s" + } + return "$sdk$backendSuffix:$model:$duration" } suspend fun generateDescription(url: String, title: String): Result<String> { - val provider = pickAndEmit("description") - ?: return Result.failure(IllegalStateException("No ready local AI model")) - return provider.generateDescription(url, title) + val pick = pickWithReason() + val provider = pick.provider + if (provider == null) { + emitNone("description", pick) + return Result.failure(IllegalStateException("No ready local AI model")) + } + emitPicked("description", provider, pick) + return runTimed("description", provider, pick) { provider.generateDescription(url, title) } } suspend fun generateTitle(url: String): Result<String> { - val provider = pickAndEmit("title") - ?: return Result.failure(IllegalStateException("No ready local AI model")) - return provider.generateTitle(url) + val pick = pickWithReason() + val provider = pick.provider + if (provider == null) { + emitNone("title", pick) + return Result.failure(IllegalStateException("No ready local AI model")) + } + emitPicked("title", provider, pick) + return runTimed("title", provider, pick) { provider.generateTitle(url) } + } + + private fun emitPicked(action: String, provider: LocalModelProvider, pick: PickResult) { + _events.tryEmit( + RouteEvent.Picked( + action = action, + activeIds = pick.activeIds, + readiness = pick.readiness, + providerId = provider.id, + providerName = provider.displayName, + reason = pick.reason, + ), + ) + } + + private fun emitNone(action: String, pick: PickResult) { + _events.tryEmit( + RouteEvent.None( + action = action, + activeIds = pick.activeIds, + readiness = pick.readiness, + reason = pick.reason, + ), + ) + } + + /** + * Times [block] and emits a [RouteEvent.Completed] regardless of how it + * exits — normal `Result` (success or failure), or thrown exception + * (notably coroutine cancellation, which `runCatching` re-raises). Without + * the try/finally, a cancellation would leave the UI strip stuck in + * "Running…" forever. + * + * `inline` is what lets the non-suspending `block` parameter actually call + * suspending provider methods — the lambda body is inlined into this + * `suspend` function's body, so it runs in a suspending context. + * + * `nanoTime` is monotonic; `currentTimeMillis` is wall-clock and can jump + * backwards on NTP / manual clock changes, producing negative durations. + */ + private suspend inline fun <T> runTimed( + action: String, + provider: LocalModelProvider, + pick: PickResult, + block: () -> Result<T>, + ): Result<T> { + val t0 = System.nanoTime() + var success = false + try { + val result = block() + success = result.isSuccess + return result + } finally { + _events.tryEmit( + RouteEvent.Completed( + action = action, + activeIds = pick.activeIds, + readiness = pick.readiness, + providerId = provider.id, + providerName = provider.displayName, + durationMs = (System.nanoTime() - t0) / 1_000_000, + success = success, + ), + ) + } } } diff --git a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/ModelDownloadManager.kt b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/ModelDownloadManager.kt index 0515e2d..bb49a68 100644 --- a/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/ModelDownloadManager.kt +++ b/androidApp/src/main/kotlin/com/jaeckel/urlvault/android/ai/ModelDownloadManager.kt @@ -290,14 +290,16 @@ class ModelDownloadManager( * Probe the server for the file's total size without downloading the * whole thing. Sends `Range: bytes=0-0` (a 1-byte slice) so the response * carries a `Content-Range: bytes 0-0/<total>` header we can parse. - * Follows the same manual-redirect chain as openWithRedirects to keep - * the Authorization header attached on CDN redirects. Returns -1 if the + * Follows the same manual-redirect chain as openWithRedirects, mirroring + * its same-host Authorization rule (see notes there). Returns -1 if the * server doesn't report a total (e.g. on a non-Range-capable origin). */ private fun discoverTotalBytes(urlString: String, token: String?, maxHops: Int = 5): Long { + val originalHost = URL(urlString).host var url = URL(urlString) var hops = 0 while (true) { + val sameHost = url.host.equals(originalHost, ignoreCase = true) val conn = (url.openConnection() as HttpURLConnection).apply { requestMethod = "GET" connectTimeout = 30_000 @@ -305,7 +307,9 @@ class ModelDownloadManager( instanceFollowRedirects = false setRequestProperty("User-Agent", "URLVault/1.0") setRequestProperty("Range", "bytes=0-0") - if (!token.isNullOrBlank()) setRequestProperty("Authorization", "Bearer $token") + if (sameHost && !token.isNullOrBlank()) { + setRequestProperty("Authorization", "Bearer $token") + } } try { val code = conn.responseCode @@ -338,9 +342,13 @@ class ModelDownloadManager( } /** - * Follow up to 5 redirects manually so we re-apply the Authorization / - * Range headers on each hop (HttpURLConnection's automatic redirect - * stripping would otherwise drop them). + * Follow up to 5 redirects manually so we re-apply the Range header on + * each hop (HttpURLConnection would otherwise drop it). The Authorization + * header is only attached while we are still on the **original host** — + * Hugging Face 302s gated downloads to a pre-signed CDN URL on + * `cas-bridge.xethub.hf.co` (and similar), and that CDN rejects extra + * `Authorization: Bearer …` headers with HTTP 401. Browsers and curl + * drop auth across origins for exactly the same reason. */ private fun openWithRedirects( urlString: String, @@ -348,9 +356,11 @@ class ModelDownloadManager( token: String?, maxHops: Int = 5, ): OpenResult { + val originalHost = URL(urlString).host var url = URL(urlString) var hops = 0 while (true) { + val sameHost = url.host.equals(originalHost, ignoreCase = true) val conn = (url.openConnection() as HttpURLConnection).apply { requestMethod = "GET" connectTimeout = 30_000 @@ -358,7 +368,9 @@ class ModelDownloadManager( instanceFollowRedirects = false setRequestProperty("User-Agent", "URLVault/1.0") if (rangeStart > 0) setRequestProperty("Range", "bytes=$rangeStart-") - if (!token.isNullOrBlank()) setRequestProperty("Authorization", "Bearer $token") + if (sameHost && !token.isNullOrBlank()) { + setRequestProperty("Authorization", "Bearer $token") + } } val code = conn.responseCode when (code) { diff --git a/shared/build.gradle.kts b/shared/build.gradle.kts index 1a558aa..a2ab916 100644 --- a/shared/build.gradle.kts +++ b/shared/build.gradle.kts @@ -37,6 +37,7 @@ kotlin { implementation(compose.runtime) implementation(compose.foundation) implementation(compose.material3) + implementation(compose.materialIconsExtended) implementation(compose.ui) implementation(compose.components.resources) implementation(compose.components.uiToolingPreview) diff --git a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AddEditBookmarkScreen.kt b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AddEditBookmarkScreen.kt index 16fe6d2..bec8c1e 100644 --- a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AddEditBookmarkScreen.kt +++ b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AddEditBookmarkScreen.kt @@ -96,8 +96,15 @@ fun AddEditBookmarkScreen( val TAG = "AddEditBookmarkScreen" - // Track which URL we've already triggered AI for, to prevent re-triggering + // Track which URL we've already triggered AI for, to prevent re-triggering. + // The mode component matters because the share-intent LaunchedEffect + // re-keys on `aiCoreEnabled`: when the AI master toggle flips on after + // a startup race, we *want* to re-trigger (legacy → AI), but only that + // once. Without the mode check, a `force = true` would re-fire even on + // unrelated recompositions, producing duplicate description / tags + // generations (and two debug provenance tags in the saved bookmark). var aiTriggeredForUrl by remember { mutableStateOf<String?>(null) } + var aiTriggeredMode by remember { mutableStateOf<String?>(null) } // Helper to normalize and validate URL for AI triggering fun normalizeUrlForAi(rawUrl: String): String? { @@ -112,12 +119,18 @@ fun AddEditBookmarkScreen( // Helper to trigger AI/autotag for a given URL fun triggerAiForUrl(targetUrl: String, force: Boolean = false) { - Logger.d(TAG, "triggerAiForUrl($targetUrl, force=$force)") - if (!force && aiTriggeredForUrl == targetUrl) { - Logger.d(TAG, "Already triggered for $targetUrl") + val desiredMode = if (aiCoreEnabled) "ai" else "legacy" + Logger.d(TAG, "triggerAiForUrl($targetUrl, force=$force, mode=$desiredMode)") + // Dedup on (URL, mode). Same URL + same mode is a no-op so unrelated + // recompositions don't re-fire the AI flow. Same URL + different mode + // (legacy → AI when the master toggle flips on after the startup + // race) IS a legitimate retrigger and falls through. + if (!force && aiTriggeredForUrl == targetUrl && aiTriggeredMode == desiredMode) { + Logger.d(TAG, "Already triggered for $targetUrl in $desiredMode mode") return } aiTriggeredForUrl = targetUrl + aiTriggeredMode = desiredMode // If AI is available and enabled, use it for title/desc/tags if (aiCoreEnabled) { @@ -236,6 +249,16 @@ fun AddEditBookmarkScreen( is AIGenerationState.Error -> { aiDescriptionError = aiDescriptionState.message onAiDescriptionConsumed() + // Description failed — but tags are an independent extraction + // and often succeed on the same input (observed: LEAP returned + // degenerate punctuation as the description while producing + // clean tags for the same URL). Fire tags from URL + title + // alone instead of giving up entirely. + val currentTarget = normalizeUrlForAi(url) + if (aiCoreEnabled && onAiGenerateTags != null && currentTarget != null) { + aiTagError = null + onAiGenerateTags(currentTarget, title, "") + } } else -> {} } @@ -264,8 +287,18 @@ fun AddEditBookmarkScreen( } } - // Auto-trigger once for prefilled URLs (share intent). - LaunchedEffect(prefilledUrl) { + // Auto-trigger for prefilled URLs (share intent). Keyed on + // `aiCoreEnabled` as well as `prefilledUrl` so the startup race — + // share intent fires before `anyProviderReady`'s async readiness + // probe has finished, so `aiCoreEnabled` is briefly false and the + // first trigger ends up on the legacy branch — gets corrected once + // AI flips on. `triggerAiForUrl`'s mode-aware dedup handles both + // cases cleanly: legacy → AI is a real mode change so it re-fires; + // a stable-true aiCoreEnabled across recompositions is the same + // mode and is deduped. The legacy result-handling LaunchedEffects' + // `if (!aiCoreEnabled)` guards already prevent stale legacy results + // from clobbering the AI values when this flip happens. + LaunchedEffect(prefilledUrl, aiCoreEnabled) { if (!isEditMode && prefilledUrl != null) { val targetUrl = normalizeUrlForAi(prefilledUrl) if (targetUrl != null) { diff --git a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AiActivityStatusLine.kt b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AiActivityStatusLine.kt new file mode 100644 index 0000000..8dd765b --- /dev/null +++ b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/AiActivityStatusLine.kt @@ -0,0 +1,149 @@ +package com.jaeckel.urlvault.ui + +import androidx.compose.animation.AnimatedVisibility +import androidx.compose.animation.fadeIn +import androidx.compose.animation.fadeOut +import androidx.compose.animation.slideInVertically +import androidx.compose.animation.slideOutVertically +import androidx.compose.foundation.layout.Arrangement +import androidx.compose.foundation.layout.Row +import androidx.compose.foundation.layout.fillMaxWidth +import androidx.compose.foundation.layout.padding +import androidx.compose.foundation.layout.size +import androidx.compose.material3.CircularProgressIndicator +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.Surface +import androidx.compose.material3.Text +import androidx.compose.runtime.Composable +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.graphics.Color +import androidx.compose.ui.text.font.FontFamily +import androidx.compose.ui.unit.dp + +/** + * State for the bottom AI-activity strip. Replaces the debug Toast that + * used to surface router decisions on every `generateXxx` call. Auto-hide + * is a presentation concern and lives in the caller — this composable just + * renders whatever it's told to. + */ +sealed class AiActivityState { + data object Hidden : AiActivityState() + + /** A provider was picked; inference is in flight. Shows a spinner. */ + data class Running( + val action: String, + val providerName: String, + ) : AiActivityState() + + /** Inference finished. Shows the wall-clock duration. */ + data class Completed( + val action: String, + val providerName: String, + val durationMs: Long, + val success: Boolean, + ) : AiActivityState() + + /** Router could not pick a provider — UI surfaces the reason. */ + data class NoProvider( + val action: String, + val reason: String, + ) : AiActivityState() +} + +/** + * Slim auto-hiding strip rendered at the bottom of the app. Designed as the + * non-obstructive replacement for the debug Toast spam: a single line that + * slides up while AI work is in flight, then briefly shows the timing, then + * slides away. + * + * Add it as the **last child of your screen's Column** (with the screen + * content above it given `Modifier.weight(1f)`) so it claims real layout + * space when visible and pushes content up. Putting it in an overlaying + * `Box` will reintroduce the obscuring behaviour the original Toast had — + * the whole point of this strip is that buttons stay reachable while it's + * showing. + * + * Auto-hide of [AiActivityState.Completed] / [AiActivityState.NoProvider] is + * the caller's responsibility — use a `LaunchedEffect(state)` with a `delay` + * and reset to [AiActivityState.Hidden]. + */ +@Composable +fun AiActivityStatusLine( + state: AiActivityState, + modifier: Modifier = Modifier, +) { + AnimatedVisibility( + visible = state !is AiActivityState.Hidden, + enter = fadeIn() + slideInVertically(initialOffsetY = { it }), + exit = fadeOut() + slideOutVertically(targetOffsetY = { it }), + modifier = modifier, + ) { + val (text, isRunning, isError) = when (state) { + is AiActivityState.Running -> Triple( + "${state.action}: ${state.providerName}…", + true, + false, + ) + is AiActivityState.Completed -> Triple( + buildString { + append(state.action) + append(" via ") + append(state.providerName) + append(" — ") + append(formatMs(state.durationMs)) + if (!state.success) append(" (failed)") + }, + false, + !state.success, + ) + is AiActivityState.NoProvider -> Triple( + "${state.action}: no model ready (${state.reason})", + false, + true, + ) + // Hidden never reached here — AnimatedVisibility hides the slot. + AiActivityState.Hidden -> Triple("", false, false) + } + + Surface( + color = if (isError) MaterialTheme.colorScheme.errorContainer + else MaterialTheme.colorScheme.surfaceVariant, + contentColor = if (isError) MaterialTheme.colorScheme.onErrorContainer + else MaterialTheme.colorScheme.onSurfaceVariant, + tonalElevation = 4.dp, + shadowElevation = 4.dp, + ) { + Row( + modifier = Modifier + .fillMaxWidth() + .padding(horizontal = 12.dp, vertical = 6.dp), + horizontalArrangement = Arrangement.spacedBy(8.dp), + verticalAlignment = Alignment.CenterVertically, + ) { + if (isRunning) { + CircularProgressIndicator( + modifier = Modifier.size(14.dp), + strokeWidth = 2.dp, + color = MaterialTheme.colorScheme.onSurfaceVariant, + ) + } + Text( + text = text, + style = MaterialTheme.typography.bodySmall, + fontFamily = FontFamily.Monospace, + color = Color.Unspecified, + ) + } + } + } +} + +private fun formatMs(ms: Long): String { + if (ms < 1000) return "$ms ms" + // Two decimal places without depending on String.format (not in commonMain). + val whole = ms / 1000 + val hundredths = (ms % 1000) / 10 + val padded = if (hundredths < 10) "0$hundredths" else "$hundredths" + return "$whole.$padded s" +} diff --git a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/BookmarkListScreen.kt b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/BookmarkListScreen.kt index 387f199..cf103cd 100644 --- a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/BookmarkListScreen.kt +++ b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/BookmarkListScreen.kt @@ -17,6 +17,9 @@ import androidx.compose.foundation.layout.width import androidx.compose.foundation.lazy.LazyColumn import androidx.compose.foundation.lazy.LazyRow import androidx.compose.foundation.lazy.items +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.filled.Settings +import androidx.compose.material.icons.filled.Sync import androidx.compose.material3.Card import androidx.compose.material3.CardDefaults import androidx.compose.material3.CircularProgressIndicator @@ -117,17 +120,17 @@ fun BookmarkListScreen( strokeWidth = 2.dp ) } else { - Text( - text = "\uD83D\uDD04", - style = MaterialTheme.typography.titleMedium + Icon( + imageVector = Icons.Default.Sync, + contentDescription = "Sync with Bitwarden", ) } } // Settings button IconButton(onClick = onOpenSettings) { - Text( - text = "\u2699\uFE0F", - style = MaterialTheme.typography.titleMedium + Icon( + imageVector = Icons.Default.Settings, + contentDescription = "Settings", ) } } diff --git a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/ModelComparisonScreen.kt b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/ModelComparisonScreen.kt index 20cac0d..9e41f14 100644 --- a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/ModelComparisonScreen.kt +++ b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/ModelComparisonScreen.kt @@ -35,6 +35,7 @@ import androidx.compose.ui.text.font.FontFamily import androidx.compose.ui.text.font.FontWeight import androidx.compose.ui.unit.dp import com.jaeckel.urlvault.ai.ModelComparisonRunner +import com.jaeckel.urlvault.ai.ModelRuntime import kotlinx.coroutines.launch @OptIn(ExperimentalMaterial3Api::class) @@ -212,7 +213,7 @@ private fun ProviderResultCard(result: ModelComparisonRunner.ProviderResult) { fontWeight = FontWeight.SemiBold, ) Text( - text = result.runtime.name, + text = runtimeLabel(result.runtime), style = MaterialTheme.typography.labelSmall, color = MaterialTheme.colorScheme.onSurfaceVariant, ) @@ -270,3 +271,16 @@ private fun ResultLine(label: String, value: String, ms: Long) { ) } } + +/** + * Human-friendly label for a runtime. The enum name `MEDIAPIPE` is a + * historical leftover from when the LiteRT-LM bundle was loaded via + * MediaPipe-LLM; the actual runtime today is LiteRT-LM, so render it that + * way in the UI rather than leaking the enum constant. + */ +private fun runtimeLabel(runtime: ModelRuntime): String = when (runtime) { + ModelRuntime.ML_KIT -> "AICore" + ModelRuntime.LLAMA_CPP -> "llama.cpp" + ModelRuntime.LEAP -> "Leap" + ModelRuntime.MEDIAPIPE -> "LiteRT-LM" +} diff --git a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/SettingsScreen.kt b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/SettingsScreen.kt index e3c8fb0..05e45a3 100644 --- a/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/SettingsScreen.kt +++ b/shared/src/commonMain/kotlin/com/jaeckel/urlvault/ui/SettingsScreen.kt @@ -78,6 +78,9 @@ fun SettingsScreen( onDeleteModel: (ModelCatalogEntry) -> Unit = {}, onToggleModelActive: (ModelCatalogEntry, Boolean) -> Unit = { _, _ -> }, onAddCustomModel: (hfRepo: String, hfFile: String, displayName: String) -> Unit = { _, _, _ -> }, + hfToken: String = "", + hfTokenFromBuild: Boolean = false, + onHfTokenChanged: (String) -> Unit = {}, onOpenComparison: () -> Unit = {}, onSaveCredentials: (BitwardenCredentials) -> Unit, onNavigateBack: () -> Unit, @@ -438,6 +441,12 @@ fun SettingsScreen( } } + HuggingFaceTokenRow( + token = hfToken, + fromBuild = hfTokenFromBuild, + onTokenChanged = onHfTokenChanged, + ) + CustomModelEntryRow(onAdd = onAddCustomModel) Button( @@ -690,6 +699,86 @@ private fun ModelCatalogRow( } } +/** + * Lets the user paste a Hugging Face access token so the downloader can + * fetch gated repos (most LiteRT-LM Gemma bundles, FunctionGemma, etc.). + * Acceptance of each model's licence on huggingface.co is also required — + * the token alone doesn't grant access. + */ +@Composable +private fun HuggingFaceTokenRow( + token: String, + fromBuild: Boolean, + onTokenChanged: (String) -> Unit, +) { + // When neither a user-saved nor a build-time token exists, default the + // row to expanded so the user is nudged to enter one. + var expanded by remember(token, fromBuild) { + mutableStateOf(token.isBlank() && !fromBuild) + } + val masked = if (token.isBlank()) "" else token.take(4) + "…" + token.takeLast(4) + + Column( + modifier = Modifier.fillMaxWidth(), + verticalArrangement = Arrangement.spacedBy(4.dp), + ) { + Row( + modifier = Modifier.fillMaxWidth(), + verticalAlignment = Alignment.CenterVertically, + horizontalArrangement = Arrangement.SpaceBetween, + ) { + Column(modifier = Modifier.weight(1f)) { + Text("Hugging Face token", style = MaterialTheme.typography.bodyLarge) + Text( + text = when { + token.isNotBlank() -> "Saved: $masked" + fromBuild -> "Using token bundled with this build" + else -> "Required for gated models (Gemma, FunctionGemma)" + }, + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + ) + } + Switch(checked = expanded, onCheckedChange = { expanded = it }) + } + if (expanded) { + var draft by remember(token) { mutableStateOf(token) } + OutlinedTextField( + value = draft, + onValueChange = { draft = it }, + label = { Text("hf_… (read access)") }, + singleLine = true, + modifier = Modifier.fillMaxWidth(), + ) + Text( + text = "Create one at huggingface.co/settings/tokens, then accept each gated " + + "model's licence on its page (e.g. google/gemma-3-1b-it).", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + ) + Row(horizontalArrangement = Arrangement.spacedBy(8.dp)) { + Button( + onClick = { + onTokenChanged(draft.trim()) + expanded = false + }, + enabled = draft.trim() != token, + modifier = Modifier.weight(1f), + ) { Text(if (token.isBlank()) "Save token" else "Update token") } + if (token.isNotBlank()) { + Button( + onClick = { + onTokenChanged("") + expanded = false + }, + modifier = Modifier.weight(1f), + ) { Text("Clear") } + } + } + } + } +} + @Composable private fun CustomModelEntryRow( onAdd: (hfRepo: String, hfFile: String, displayName: String) -> Unit,