diff --git a/src-tauri/src/asr/moonshine.rs b/src-tauri/src/asr/moonshine.rs
index df1e59b..201c159 100644
--- a/src-tauri/src/asr/moonshine.rs
+++ b/src-tauri/src/asr/moonshine.rs
@@ -673,7 +673,7 @@ impl MoonshineBackend {
             let arr = if use_cache {
                 kv.values[idx]
                     .as_ref()
-                    .ok_or_else(|| anyhow!("KV cache slot {} unpopulated under use_cache", idx))?
+                    .ok_or_else(|| anyhow!("KV cache slot {idx} unpopulated under use_cache"))?
                     .clone()
             } else {
                 let resolved_shape: Vec<usize> = past
@@ -736,7 +736,7 @@ impl MoonshineBackend {
             .map_err(|e| anyhow!("ort extract logits: {e}"))?;
         let shape = logits_view.shape().to_vec();
         if shape.len() != 3 || shape[0] != 1 {
-            return Err(anyhow!("unexpected decoder logits shape {:?}", shape));
+            return Err(anyhow!("unexpected decoder logits shape {shape:?}"));
         }
         let last = shape[1] - 1;
         let vocab = shape[2];
diff --git a/src-tauri/src/asr/parakeet.rs b/src-tauri/src/asr/parakeet.rs
index d810e05..db156f6 100644
--- a/src-tauri/src/asr/parakeet.rs
+++ b/src-tauri/src/asr/parakeet.rs
@@ -127,7 +127,7 @@ impl AsrBackend for ParakeetBackend {
                 .map_err(|e| anyhow!("ort threads: {e}"))?
                 .commit_from_file(&model_path_owned)
                 .map_err(|e| anyhow!("loading {}: {e}", model_path_owned.display()))
-                .with_context(|| format!("warm_up parakeet {}", model_name_owned))
+                .with_context(|| format!("warm_up parakeet {model_name_owned}"))
         })?;
 
         // Sniff I/O names. NeMo's istupakov export uses
diff --git a/src-tauri/src/cli.rs b/src-tauri/src/cli.rs
index fd69db0..a13bd85 100644
--- a/src-tauri/src/cli.rs
+++ b/src-tauri/src/cli.rs
@@ -1052,13 +1052,10 @@ async fn cmd_fetch_onnxruntime() -> Result<()> {
             let pct = (bytes as f64 / total as f64 * 100.0) as u64;
             let _ = write!(
                 std::io::stderr(),
-                "\r  {:>3}%  {:>10} / {:>10} bytes",
-                pct,
-                bytes,
-                total
+                "\r  {pct:>3}%  {bytes:>10} / {total:>10} bytes"
             );
         } else {
-            let _ = write!(std::io::stderr(), "\r  {:>10} bytes", bytes);
+            let _ = write!(std::io::stderr(), "\r  {bytes:>10} bytes");
         }
         let _ = std::io::stderr().flush();
     });
diff --git a/src-tauri/src/diarize/embedder.rs b/src-tauri/src/diarize/embedder.rs
index 88d7373..2e9b473 100644
--- a/src-tauri/src/diarize/embedder.rs
+++ b/src-tauri/src/diarize/embedder.rs
@@ -234,10 +234,7 @@ impl Embedder {
         // is correct for both shapes since the leading axes are 1.
         let shape = view.shape().to_vec();
         if shape.last().copied().unwrap_or(0) == 0 {
-            return Err(anyhow!(
-                "embedder produced zero-length output ({:?})",
-                shape
-            ));
+            return Err(anyhow!("embedder produced zero-length output ({shape:?})"));
         }
         let mut out: Vec<f32> = view.iter().copied().collect();
         l2_normalize(&mut out);
diff --git a/src-tauri/src/diarize/segmenter.rs b/src-tauri/src/diarize/segmenter.rs
index 27a09f0..4c323da 100644
--- a/src-tauri/src/diarize/segmenter.rs
+++ b/src-tauri/src/diarize/segmenter.rs
@@ -227,8 +227,7 @@ impl Segmenter {
         let shape = logits.shape().to_vec();
         if shape.len() != 3 || shape[0] != 1 || shape[2] != 7 {
             return Err(anyhow!(
-                "unexpected segmenter output shape {:?} (want [1, T, 7])",
-                shape
+                "unexpected segmenter output shape {shape:?} (want [1, T, 7])"
             ));
         }
         let t_frames = shape[1];
diff --git a/src-tauri/src/hardware.rs b/src-tauri/src/hardware.rs
index 90ff2ad..e471262 100644
--- a/src-tauri/src/hardware.rs
+++ b/src-tauri/src/hardware.rs
@@ -177,6 +177,7 @@ fn read_proc_meminfo_total_gb() -> Option<f64> {
 }
 
 /// Pulled out for testability. Reads the `MemTotal: NNN kB` line.
+#[cfg_attr(not(target_os = "linux"), allow(dead_code))] // Linux detect() + tests only
 fn parse_meminfo_total_kb(content: &str) -> Option<u64> {
     for line in content.lines() {
         if let Some(rest) = line.strip_prefix("MemTotal:") {
@@ -262,6 +263,7 @@ fn df_k_gb(path: &str) -> Option<f64> {
 /// rows; field 4 is `Available` in 1K blocks. Some `df` flavours wrap long
 /// device names onto a second line, so the available column may be on the
 /// row after the device name. Find the first row with a numeric column 4.
+#[cfg_attr(not(target_os = "linux"), allow(dead_code))] // Linux detect() + tests only
 fn parse_df_avail_kb(out: &str) -> Option<u64> {
     for line in out.lines().skip(1) {
         let parts: Vec<&str> = line.split_whitespace().collect();
@@ -303,6 +305,7 @@ fn detect_soc_label() -> Option<String> {
     None
 }
 
+#[cfg_attr(not(target_os = "linux"), allow(dead_code))] // Linux detect() + tests only
 fn parse_device_tree_model(raw: &[u8]) -> Option<String> {
     // Trim trailing NUL bytes the kernel attaches to the device-tree string.
     let end = raw.iter().position(|b| *b == 0).unwrap_or(raw.len());
@@ -313,6 +316,7 @@ fn parse_device_tree_model(raw: &[u8]) -> Option<String> {
     Some(s.to_string())
 }
 
+#[cfg_attr(not(target_os = "linux"), allow(dead_code))] // Linux detect() + tests only
 fn parse_cpuinfo_model(content: &str) -> Option<String> {
     // ARM kernels emit `Model : Raspberry Pi 5 Model B Rev 1.0` and/or
     // `Hardware : BCM2712`. Prefer the human-friendly Model line.
diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs
index 321d46e..35a04e8 100644
--- a/src-tauri/src/main.rs
+++ b/src-tauri/src/main.rs
@@ -73,6 +73,25 @@ async fn ollama_list_models() -> Result<Vec<ollama::ModelInfo>, String> {
     ollama::list_models().await.map_err(|e| e.to_string())
 }
 
+/// True when `model` is already resident in Ollama's memory, so the
+/// next chat won't cold-load. The chat UI uses this to decide whether
+/// to paint the load dialog *before* firing the request (a cold load
+/// can thrash the machine hard enough that a delayed dialog never
+/// renders). Best-effort: returns false if Ollama can't be reached.
+#[tauri::command]
+async fn ollama_model_loaded(model: String) -> bool {
+    ollama::is_model_loaded(&model).await
+}
+
+/// Proactively load `model` into memory at the throttled server's low
+/// priority, so the one-time cold load happens at a predictable moment
+/// (e.g. just after launch) instead of freezing the user mid-chat.
+#[tauri::command]
+async fn ollama_warm(model: String) -> Result<(), String> {
+    ollama::ensure_running().await.map_err(|e| e.to_string())?;
+    ollama::warm(&model).await.map_err(|e| e.to_string())
+}
+
 #[tauri::command]
 async fn ollama_delete_model(name: String) -> Result<(), String> {
     ollama::delete_model(&name).await.map_err(|e| e.to_string())
@@ -632,7 +651,7 @@ fn mesh_file_save_at(path: String, bytes_b64: String) -> Result<(), String> {
     }
     let target = std::path::PathBuf::from(&path);
     if target.is_dir() {
-        return Err(format!("target {} is a directory", path));
+        return Err(format!("target {path} is a directory"));
     }
     // Best-effort: make sure the parent directory exists. The
     // save-dialog typically lands the user inside an existing folder,
@@ -1005,6 +1024,8 @@ fn main() {
             ollama_install,
             ollama_stop,
             ollama_list_models,
+            ollama_model_loaded,
+            ollama_warm,
             ollama_delete_model,
             preload_modes,
             ensure_tracked_models,
diff --git a/src-tauri/src/mesh/daemon.rs b/src-tauri/src/mesh/daemon.rs
index 033c893..53fc88a 100644
--- a/src-tauri/src/mesh/daemon.rs
+++ b/src-tauri/src/mesh/daemon.rs
@@ -532,25 +532,28 @@ impl Drop for DaemonChild {
 /// dev cycle rewrites with fresh content instead of perpetually
 /// staging corrupt bits. Files at paths we DON'T own (PATH
 /// lookup, env-var override) just get skipped without deletion.
-fn looks_like_executable(path: &Path) -> bool {
+/// Validate a candidate daemon binary. `Ok(())` when usable, else
+/// `Err(reason)` describing why it was rejected. Performs the self-heal
+/// removal of a stale *owned* slot, but does NOT log — callers collect
+/// the reasons and surface them only when the whole search fails, so a
+/// successful daemon launch stays quiet (see `ensure_daemon_running`).
+fn check_executable(path: &Path) -> Result<(), String> {
     match validate_path_is_executable(path) {
-        Ok(()) => true,
+        Ok(()) => Ok(()),
         Err(reason) => {
-            // Self-heal: if this is a known-owned slot and the
-            // content is invalid, delete it. Tauri's externalBin
-            // staging will regenerate from the source on the
-            // next build; the source's own validator catches
-            // problems before propagating them.
+            // Self-heal: if this is a known-owned slot and the content is
+            // invalid, delete it. Tauri's externalBin staging regenerates
+            // it from the source on the next build; the source's own
+            // validator catches problems before propagating them.
             if is_owned_slot(path) {
-                eprintln!(
-                    "daemon: {} failed executable check ({reason}); removing stale file",
-                    path.display()
-                );
                 let _ = std::fs::remove_file(path);
+                Err(format!(
+                    "{} failed executable check ({reason}); removed stale file",
+                    path.display()
+                ))
             } else {
-                eprintln!("daemon: skipping {} ({reason})", path.display());
+                Err(format!("skipping {} ({reason})", path.display()))
             }
-            false
         }
     }
 }
@@ -576,7 +579,7 @@ fn validate_path_is_executable(path: &Path) -> Result<(), String> {
         0xFEED_FACE | 0xFEED_FACF | 0xCAFE_BABE | 0xBEBA_FECA
     );
     if !(pe || elf || macho) {
-        return Err(format!("bad magic {:02x?}", head));
+        return Err(format!("bad magic {head:02x?}"));
     }
     if pe {
         f.seek(SeekFrom::Start(0x3C))
@@ -623,6 +626,14 @@ fn is_owned_slot(path: &Path) -> bool {
 }
 
 pub fn daemon_binary_candidates() -> Vec<PathBuf> {
+    daemon_binary_candidates_diag().0
+}
+
+/// Like [`daemon_binary_candidates`] but also returns the human-readable
+/// reason each rejected path was skipped. `ensure_daemon_running` holds
+/// these and only prints them if the whole search fails, so a successful
+/// launch doesn't spam the log with every probed-and-skipped location.
+fn daemon_binary_candidates_diag() -> (Vec<PathBuf>, Vec<String>) {
     let exe = if cfg!(windows) {
         "myownmesh.exe"
     } else {
@@ -633,23 +644,25 @@ pub fn daemon_binary_candidates() -> Vec<PathBuf> {
     // sidecars next to the dev exe; `tauri build` strips it.
     // Checking both covers dev + production from one runtime path.
     let exe_with_triple = if cfg!(windows) {
-        format!("myownmesh-{}.exe", DAEMON_SIDECAR_TRIPLE)
+        format!("myownmesh-{DAEMON_SIDECAR_TRIPLE}.exe")
     } else {
-        format!("myownmesh-{}", DAEMON_SIDECAR_TRIPLE)
+        format!("myownmesh-{DAEMON_SIDECAR_TRIPLE}")
     };
     let mut out: Vec<PathBuf> = Vec::new();
+    let mut diags: Vec<String> = Vec::new();
 
     // Helper: push a candidate iff it exists AND looks like a
     // real executable (filters out the zero-byte stub
     // `build.rs` writes when the daemon fetch was skipped, AND
     // filters out corrupt / truncated downloads that would
     // otherwise produce a confusing "%1 is not a valid Win32
-    // application" error when we try to spawn them).
-    fn push_if_usable(out: &mut Vec<PathBuf>, p: PathBuf) {
-        if !looks_like_executable(&p) {
-            return;
+    // application" error when we try to spawn them). Rejection
+    // reasons are collected into `diags` rather than logged here.
+    fn push_if_usable(out: &mut Vec<PathBuf>, diags: &mut Vec<String>, p: PathBuf) {
+        match check_executable(&p) {
+            Ok(()) => out.push(p),
+            Err(reason) => diags.push(reason),
         }
-        out.push(p);
     }
 
     // 1. Bundled sidecar next to the running LLM executable —
@@ -659,8 +672,8 @@ pub fn daemon_binary_candidates() -> Vec<PathBuf> {
     //    via the same code path.
     if let Ok(exe_path) = std::env::current_exe() {
         if let Some(exe_dir) = exe_path.parent() {
-            push_if_usable(&mut out, exe_dir.join(exe));
-            push_if_usable(&mut out, exe_dir.join(&exe_with_triple));
+            push_if_usable(&mut out, &mut diags, exe_dir.join(exe));
+            push_if_usable(&mut out, &mut diags, exe_dir.join(&exe_with_triple));
         }
     }
 
@@ -670,7 +683,11 @@ pub fn daemon_binary_candidates() -> Vec<PathBuf> {
     //     the *only* place the binary lives. Relative to the
     //     crate, so it works from any working directory.
     let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
-    push_if_usable(&mut out, manifest.join("binaries").join(&exe_with_triple));
+    push_if_usable(
+        &mut out,
+        &mut diags,
+        manifest.join("binaries").join(&exe_with_triple),
+    );
 
     // 2 + 3. Explicit env-var overrides.
     for var in ["MYOWNLLM_MESH_BIN", "MYOWNMESH_BIN"] {
@@ -696,12 +713,21 @@ pub fn daemon_binary_candidates() -> Vec<PathBuf> {
     //    `manifest` already declared above for the `binaries/`
     //    lookup; reuse it here.
     for profile in ["debug", "release"] {
-        push_if_usable(&mut out, manifest.join("target").join(profile).join(exe));
+        push_if_usable(
+            &mut out,
+            &mut diags,
+            manifest.join("target").join(profile).join(exe),
+        );
         if let Some(parent) = manifest.parent() {
-            push_if_usable(&mut out, parent.join("target").join(profile).join(exe));
+            push_if_usable(
+                &mut out,
+                &mut diags,
+                parent.join("target").join(profile).join(exe),
+            );
             if let Some(grandparent) = parent.parent() {
                 push_if_usable(
                     &mut out,
+                    &mut diags,
                     grandparent
                         .join("MyOwnMesh")
                         .join("target")
@@ -719,7 +745,7 @@ pub fn daemon_binary_candidates() -> Vec<PathBuf> {
         let canonical = std::fs::canonicalize(p).unwrap_or_else(|_| p.clone());
         seen.insert(canonical)
     });
-    out
+    (out, diags)
 }
 
 /// Legacy single-binary lookup. Returns the highest-priority
@@ -786,8 +812,15 @@ pub async fn ensure_daemon_running() -> Result<(ControlClient, Option<DaemonChil
     //    skipped in favour of a working binary at the next. The
     //    diag log surfaces each attempt so a user with multiple
     //    stale candidates can see which one we picked.
-    let candidates = daemon_binary_candidates();
+    // `skip_diags` records every probed-but-rejected location. We stay
+    // quiet about them on the happy path and only surface them if the
+    // search ultimately fails, so a normal launch doesn't log a wall of
+    // "skipping ..." lines for paths that simply don't apply here.
+    let (candidates, skip_diags) = daemon_binary_candidates_diag();
     if candidates.is_empty() {
+        for d in &skip_diags {
+            eprintln!("daemon: {d}");
+        }
         return Err(anyhow!(
             "couldn't find a `myownmesh` binary. Re-run the LLM build with network \
              access so `build.rs` can fetch the daemon, set MYOWNLLM_MESH_BIN to a \
@@ -870,8 +903,14 @@ pub async fn ensure_daemon_running() -> Result<(ControlClient, Option<DaemonChil
         drop(handle);
     }
 
-    // Every candidate failed. Surface a diag listing all of them
-    // so the user can see what got tried — beats "couldn't find".
+    // Every candidate failed. Now that the search has definitively
+    // failed, surface the locations we skipped during enumeration too —
+    // they're part of the picture the user needs to debug it.
+    for d in &skip_diags {
+        eprintln!("daemon: {d}");
+    }
+    // Surface a diag listing everything tried so the user can see what
+    // got attempted — beats a bare "couldn't find".
     let tried: Vec<String> = candidates.iter().map(|p| p.display().to_string()).collect();
     Err(anyhow!(
         "no working `myownmesh` binary on this machine. Tried:\n  {}\nLast error: {}",
diff --git a/src-tauri/src/mesh/identity.rs b/src-tauri/src/mesh/identity.rs
index 7dea8e7..31dc1e7 100644
--- a/src-tauri/src/mesh/identity.rs
+++ b/src-tauri/src/mesh/identity.rs
@@ -17,5 +17,5 @@
 //! `myownmesh-core` and aren't duplicated here.
 
 pub use myownmesh_core::identity::{
-    generate_network_id, load_or_create, normalize_network_id, set_label, Identity,
+    generate_network_id, load_or_create, normalize_network_id, set_label,
 };
diff --git a/src-tauri/src/mesh/roster.rs b/src-tauri/src/mesh/roster.rs
index 7f29ba8..44a60be 100644
--- a/src-tauri/src/mesh/roster.rs
+++ b/src-tauri/src/mesh/roster.rs
@@ -23,8 +23,7 @@
 //! over.
 
 pub use myownmesh_core::roster::{
-    add_peer, add_peer_in, delete, empty_for, is_authorized, load, remove_peer, remove_peer_in,
-    save, AuthorizedPeer, Roster, ROSTER_VERSION,
+    add_peer, delete, load, remove_peer, save, AuthorizedPeer, Roster, ROSTER_VERSION,
 };
 
 /// One-shot migration from the pre-multi-network single roster file
diff --git a/src-tauri/src/mesh/signing.rs b/src-tauri/src/mesh/signing.rs
index 36066ec..6aaa009 100644
--- a/src-tauri/src/mesh/signing.rs
+++ b/src-tauri/src/mesh/signing.rs
@@ -8,4 +8,4 @@
 //! which reads `MYOWNMESH_HOME` — set to `~/.myownllm` in `main.rs`
 //! — so the local Device ID is unchanged.
 
-pub use myownmesh_core::signing::{pubkey_part, sign, verify};
+pub use myownmesh_core::signing::{sign, verify};
diff --git a/src-tauri/src/ollama.rs b/src-tauri/src/ollama.rs
index 1157782..c89bf65 100644
--- a/src-tauri/src/ollama.rs
+++ b/src-tauri/src/ollama.rs
@@ -96,6 +96,10 @@ fn ensure_macos_default_on_path() -> bool {
     false
 }
 
+// The trailing `Ok(())` is the Linux fall-through / unsupported-platform
+// fallback; on macOS and Windows the cfg blocks above always return, so
+// it's unreachable there by design — silence the per-platform lint.
+#[allow(unreachable_code)]
 pub async fn install() -> Result<()> {
     #[cfg(target_os = "linux")]
     {
@@ -274,33 +278,108 @@ pub async fn ensure_running() -> Result<()> {
         return Ok(());
     }
 
-    // OLLAMA_ORIGINS=* belt-and-suspenders: when WE spawn the server (e.g. Linux
-    // or a fresh standalone Windows install), this lets the GUI fetch directly
-    // from `http://127.0.0.1:11434` without Ollama's CORS allowlist rejecting
-    // the WebView's `Origin` (which on Tauri 2 / Windows is `http://tauri.localhost`,
-    // not in Ollama's defaults). When the Windows installer runs Ollama as a
-    // tray service we can't influence its env — that's why the GUI also routes
-    // chat through myownllm's API server (see Chat.svelte).
-    let child = quiet_tokio_command("ollama")
-        .arg("serve")
-        .env("OLLAMA_ORIGINS", "*")
-        .stdout(Stdio::null())
-        .stderr(Stdio::null())
-        .spawn()
-        .context("failed to spawn ollama serve")?;
+    // Throttle the server at launch (Settings → Performance). Applying it
+    // as an argv prefix is the reliable lever — notably on macOS, where
+    // taskpolicy's IO policy only takes effect when launching a program,
+    // not via `-p` on a running PID (that gap left the throttle a no-op
+    // and let a load thrash the machine). "off" yields no prefix.
+    let mode = throttle_mode();
+    let prefix = crate::process::throttle_launch_prefix(&mode);
 
+    // Spawn under the wrapper when we have one. If the wrapper binary is
+    // missing the spawn itself errors — fall back to a plain spawn so a
+    // missing throttle tool can never leave the app without an LLM.
+    let child = match spawn_ollama_serve(prefix.as_deref()) {
+        Ok(c) => c,
+        Err(_) if prefix.is_some() => {
+            spawn_ollama_serve(None).context("failed to spawn ollama serve")?
+        }
+        Err(e) => return Err(anyhow::Error::new(e).context("failed to spawn ollama serve")),
+    };
     *guard = Some(child);
 
-    // Wait up to 10 seconds for API to become reachable.
+    // Windows throttles post-spawn (no reliable launch wrapper there);
+    // Unix already throttled via the argv prefix above.
+    #[cfg(target_os = "windows")]
+    if mode != "off" {
+        if let Some(pid) = guard.as_ref().and_then(|c| c.id()) {
+            crate::process::set_priority_windows(pid, &mode).await;
+        }
+    }
+
+    // Wait up to 10s for the API. Bail early if a launch-wrapper child
+    // exited without exec'ing ollama (e.g. an unsupported flag on this OS
+    // version): try_wait → Some means the wrapper failed, so we stop
+    // waiting and retry directly below rather than burning the full 10s.
+    let mut up = false;
     for _ in 0..20 {
         tokio::time::sleep(std::time::Duration::from_millis(500)).await;
         if api_reachable().await {
-            return Ok(());
+            up = true;
+            break;
+        }
+        if prefix.is_some() {
+            if let Some(c) = guard.as_mut() {
+                if matches!(c.try_wait(), Ok(Some(_))) {
+                    break; // wrapper died without bringing ollama up
+                }
+            }
+        }
+    }
+    if up {
+        return Ok(());
+    }
+
+    // Wrapped launch never came up — retry with a direct spawn so a
+    // broken/incompatible throttle tool can't disable the LLM entirely.
+    if prefix.is_some() {
+        if let Some(mut dead) = guard.take() {
+            let _ = dead.kill().await;
+        }
+        let direct = spawn_ollama_serve(None).context("failed to spawn ollama serve")?;
+        *guard = Some(direct);
+        for _ in 0..20 {
+            tokio::time::sleep(std::time::Duration::from_millis(500)).await;
+            if api_reachable().await {
+                return Ok(());
+            }
         }
     }
     Err(anyhow!("ollama serve did not become reachable within 10s"))
 }
 
+/// Spawn `ollama serve` with our standard env, optionally under a
+/// launch-time throttle wrapper (`prefix`, e.g. `["taskpolicy", "-d",
+/// "throttle"]`). The wrapper tools exec their target, so the child PID
+/// is ollama itself — kill-on-exit (`stop`) is unaffected.
+///
+/// OLLAMA_ORIGINS=* is belt-and-suspenders: when WE spawn the server this
+/// lets the GUI fetch directly from `http://127.0.0.1:11434` without
+/// Ollama's CORS allowlist rejecting the WebView's `Origin`. The memory
+/// caps keep at most one model resident and serve one request at a time,
+/// so Ollama never tries to hold two models (or N parallel KV caches) in
+/// RAM/VRAM at once — the swap thrash behind the hardest freezes.
+fn spawn_ollama_serve(prefix: Option<&[&str]>) -> std::io::Result<tokio::process::Child> {
+    let mut cmd = match prefix {
+        Some(p) if !p.is_empty() => {
+            let mut c = quiet_tokio_command(p[0]);
+            c.args(&p[1..]).arg("ollama").arg("serve");
+            c
+        }
+        _ => {
+            let mut c = quiet_tokio_command("ollama");
+            c.arg("serve");
+            c
+        }
+    };
+    cmd.env("OLLAMA_ORIGINS", "*")
+        .env("OLLAMA_MAX_LOADED_MODELS", "1")
+        .env("OLLAMA_NUM_PARALLEL", "1")
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .spawn()
+}
+
 async fn api_reachable() -> bool {
     reqwest_get("http://127.0.0.1:11434/").await.is_ok()
 }
@@ -595,14 +674,41 @@ pub async fn has_model(model: &str) -> Result<bool> {
     Ok(out.success())
 }
 
+/// True when `model` is currently loaded in Ollama's memory — i.e. the
+/// next chat won't pay a cold load. Queried via `/api/ps`. On any error
+/// (Ollama down, curl missing, unparseable body) we return `false` so
+/// callers fall back to showing the load dialog rather than wrongly
+/// skipping it. Ollama reports loaded models under either `name` or
+/// `model`, so we match on both.
+pub async fn is_model_loaded(model: &str) -> bool {
+    let Ok(body) = reqwest_get("http://127.0.0.1:11434/api/ps").await else {
+        return false;
+    };
+    let Ok(v) = serde_json::from_str::<serde_json::Value>(&body) else {
+        return false;
+    };
+    v.get("models")
+        .and_then(|m| m.as_array())
+        .map(|arr| {
+            arr.iter().any(|e| {
+                e.get("name").and_then(|n| n.as_str()) == Some(model)
+                    || e.get("model").and_then(|n| n.as_str()) == Some(model)
+            })
+        })
+        .unwrap_or(false)
+}
+
 /// Fire a 1-token chat call so Ollama mmaps the weights and keeps the model loaded
-/// for `keep_alive`. Used by `myownllm preload --warm`.
+/// for `keep_alive`. Used by `myownllm preload --warm` and the startup warm.
+/// Honors the user's configured `keep_alive` so a proactive warm respects
+/// the same residency window as real chats (warming with "0" would just
+/// load-then-unload, so callers skip warming in that case).
 pub async fn warm(model: &str) -> Result<()> {
     let body = serde_json::json!({
         "model": model,
         "messages": [{"role": "user", "content": "ok"}],
         "stream": false,
-        "keep_alive": "10m",
+        "keep_alive": chat_keep_alive(),
         "options": { "num_predict": 1 }
     })
     .to_string();
@@ -730,6 +836,44 @@ pub enum ChatStreamOutcome {
     Cancelled,
 }
 
+/// Resolve the user's configured Ollama `keep_alive` for chat
+/// requests. This controls how long Ollama keeps the model resident
+/// in RAM/VRAM after a turn finishes: longer values avoid cold-start
+/// reloads between messages (the common "why is it slow again?"
+/// complaint), shorter values free memory sooner so the LLM can
+/// coexist with transcription on a memory-tight machine. Accepts
+/// Ollama's native duration format — "30m", "1h", "0" (unload
+/// immediately), "-1" (keep until evicted). Falls back to "30m" when
+/// the config is unreadable or the key is absent (older configs).
+fn chat_keep_alive() -> serde_json::Value {
+    crate::resolver::load_config_value()
+        .ok()
+        .and_then(|c| {
+            c.get("ollama_keep_alive")
+                .and_then(|v| v.as_str())
+                .map(str::to_string)
+        })
+        .map(serde_json::Value::from)
+        .unwrap_or_else(|| serde_json::json!("30m"))
+}
+
+/// Resolve the user's configured throttle mode for the Ollama server we
+/// spawn — how hard we ease its priority so model loading doesn't starve
+/// the desktop: "off" (no throttle), "io" (disk-IO only; keeps inference
+/// full speed — the default), or "aggressive" (also demote CPU/QoS; most
+/// responsive machine but slower inference). Falls back to "io".
+fn throttle_mode() -> String {
+    crate::resolver::load_config_value()
+        .ok()
+        .and_then(|c| {
+            c.get("ollama_throttle")
+                .and_then(|v| v.as_str())
+                .map(str::to_string)
+        })
+        .filter(|m| matches!(m.as_str(), "off" | "io" | "aggressive"))
+        .unwrap_or_else(|| "io".to_string())
+}
+
 /// Streamed chat completion. Invokes `on_content` for each visible token
 /// chunk, `on_thinking` for any reasoning/thinking deltas (thinking models
 /// emit those in `message.thinking`; non-thinking models never call it),
@@ -771,6 +915,7 @@ where
         "model": model,
         "messages": messages,
         "stream": true,
+        "keep_alive": chat_keep_alive(),
     });
     if let Some(t) = think {
         body["think"] = serde_json::json!(t);
@@ -933,6 +1078,7 @@ pub async fn chat_once(
         "model": model,
         "messages": messages,
         "stream": false,
+        "keep_alive": chat_keep_alive(),
     });
     if let Some(opts) = options {
         body["options"] = opts;
diff --git a/src-tauri/src/process.rs b/src-tauri/src/process.rs
index 9d1eeab..7e84356 100644
--- a/src-tauri/src/process.rs
+++ b/src-tauri/src/process.rs
@@ -41,8 +41,93 @@ fn apply_quiet_flags(_cmd: &mut std::process::Command) {}
 
 #[cfg(target_os = "windows")]
 fn apply_quiet_flags_tokio(cmd: &mut tokio::process::Command) {
-    use std::os::windows::process::CommandExt;
+    // tokio's Command exposes `creation_flags` inherently — no CommandExt
+    // trait import needed (unlike the std::process variant above).
     cmd.creation_flags(CREATE_NO_WINDOW);
 }
 #[cfg(not(target_os = "windows"))]
 fn apply_quiet_flags_tokio(_cmd: &mut tokio::process::Command) {}
+
+/// Launch-time throttle wrapper for the LLM server, per the user's `mode`
+/// ("io" | "aggressive"; "off" → `None`). Returned as an argv prefix to
+/// prepend before `ollama serve` rather than something we apply to the
+/// PID after spawn — that distinction matters on macOS, where
+/// `taskpolicy`'s disk-IO policy only takes effect when *launching* a
+/// program, not via `-p` on a running one. Applying it post-spawn was a
+/// silent no-op, which left the server unthrottled and let a model load
+/// thrash the whole machine. The wrapper tools (`ionice`/`nice`/
+/// `taskpolicy`) exec their target, so the resulting child PID is still
+/// ollama and our kill-on-exit handling is unaffected.
+///
+/// - `"io"` (default, "balanced"): a moderate `nice` (CPU) — plus a low
+///   disk-IO class on Linux. `nice` only makes the server yield when
+///   something else (the display server, networking, the WebView) wants
+///   the CPU, so the machine stays responsive during a heavy load while
+///   inference still gets the bulk of the cores when nothing competes.
+///   Crucially it does NOT leave the CPU wide open to the server — which
+///   is what starved the desktop and froze the machine — nor force it
+///   onto efficiency cores like background QoS, so inference isn't
+///   crippled.
+/// - `"aggressive"`: deep `nice` / background QoS — most responsive
+///   desktop during a load, but noticeably slower inference.
+///
+/// `None` on Windows (it throttles post-spawn via [`set_priority_windows`]
+/// instead) and for `"off"`.
+pub fn throttle_launch_prefix(mode: &str) -> Option<Vec<&'static str>> {
+    if mode == "off" {
+        return None;
+    }
+    let aggressive = mode == "aggressive";
+    #[cfg(target_os = "linux")]
+    {
+        Some(if aggressive {
+            // Max nice + idle IO class — server only runs when nothing
+            // else wants CPU or disk. Snappiest desktop, slowest model.
+            vec!["nice", "-n", "19", "ionice", "-c", "3"]
+        } else {
+            // Moderate nice so the system keeps headroom, plus low
+            // best-effort IO so disk reads yield under contention. The
+            // server still gets most of the CPU when it's the only thing
+            // running, so inference stays fast.
+            vec!["nice", "-n", "10", "ionice", "-c", "2", "-n", "7"]
+        })
+    }
+    #[cfg(target_os = "macos")]
+    {
+        Some(if aggressive {
+            // Background QoS: efficiency cores + throttled compute & IO.
+            // Frees the machine most, but slows inference.
+            vec!["taskpolicy", "-b"]
+        } else {
+            // Moderate nice only — reserves CPU headroom for the system
+            // (display, networking) while leaving the server on the
+            // performance cores, so inference isn't kneecapped. `nice` is
+            // POSIX and always present, so this can't fail the launch.
+            vec!["nice", "-n", "10"]
+        })
+    }
+    #[cfg(not(any(target_os = "linux", target_os = "macos")))]
+    {
+        let _ = aggressive; // consumed on platforms without a launch wrapper
+        None
+    }
+}
+
+/// Windows-only: ease the spawned server's priority class after spawn.
+/// Windows has no launch-time IO-throttle wrapper we can rely on and no
+/// external per-process IO priority without FFI, so we nudge the priority
+/// class instead — `BelowNormal` for the default, `Idle` when aggressive.
+/// Best-effort; failure just means no throttle.
+#[cfg(target_os = "windows")]
+pub async fn set_priority_windows(pid: u32, mode: &str) {
+    let class = if mode == "aggressive" {
+        "Idle"
+    } else {
+        "BelowNormal"
+    };
+    let script = format!("(Get-Process -Id {pid}).PriorityClass='{class}'");
+    let _ = quiet_tokio_command("powershell")
+        .args(["-NoProfile", "-NonInteractive", "-Command", &script])
+        .status()
+        .await;
+}
diff --git a/src-tauri/src/resolver.rs b/src-tauri/src/resolver.rs
index 9a0e92e..84d53f4 100644
--- a/src-tauri/src/resolver.rs
+++ b/src-tauri/src/resolver.rs
@@ -783,6 +783,9 @@ pub fn default_config_value() -> Value {
         "active_family": "gemma4",
         "active_mode": "text",
         "model_cleanup_days": 1,
+        "ollama_keep_alive": "30m",
+        "ollama_throttle": "io",
+        "warm_on_startup": true,
         "kept_models": [],
         "mode_overrides": {},
         "tracked_modes": ["text"],
diff --git a/src-tauri/src/transcribe.rs b/src-tauri/src/transcribe.rs
index 4dba775..93a8f65 100644
--- a/src-tauri/src/transcribe.rs
+++ b/src-tauri/src/transcribe.rs
@@ -1409,8 +1409,7 @@ fn run_session(
                         count_pending_chunks(&buffer_dir),
                         None,
                         Some(format!(
-                            "ASR inference error ({}/{}): {e:#}",
-                            consecutive_errors, ASR_CONSECUTIVE_ERROR_LIMIT
+                            "ASR inference error ({consecutive_errors}/{ASR_CONSECUTIVE_ERROR_LIMIT}): {e:#}"
                         )),
                     ),
                 );
@@ -2128,8 +2127,7 @@ fn ingest_loop(
                         count_pending_chunks(&buffer_dir),
                         None,
                         Some(format!(
-                            "Backlog full ({:.0} s); dropping oldest chunk to stay live.",
-                            MAX_BACKLOG_SECONDS
+                            "Backlog full ({MAX_BACKLOG_SECONDS:.0} s); dropping oldest chunk to stay live."
                         )),
                     ),
                 );
diff --git a/src-tauri/src/usage.rs b/src-tauri/src/usage.rs
index 781ef9a..ba93fa5 100644
--- a/src-tauri/src/usage.rs
+++ b/src-tauri/src/usage.rs
@@ -281,6 +281,67 @@ fn cpu_count() -> Option<u32> {
         .map(|n| n.get() as u32)
 }
 
+/// Sum a `ps -A -o %cpu=` dump (one float per process, each already a
+/// share of a single core) and normalise by `cpus` into a 0..100 share
+/// of total system CPU. `None` when nothing parses. Pure so it can be
+/// unit-tested on any host even though its only caller is macOS.
+#[cfg_attr(not(target_os = "macos"), allow(dead_code))]
+fn parse_total_cpu_pct(ps_output: &str, cpus: f64) -> Option<f64> {
+    let cpus = if cpus <= 0.0 { 1.0 } else { cpus };
+    let mut sum = 0.0;
+    let mut any = false;
+    for tok in ps_output.split_whitespace() {
+        if let Ok(v) = tok.parse::<f64>() {
+            sum += v;
+            any = true;
+        }
+    }
+    if !any {
+        return None;
+    }
+    Some((sum / cpus).clamp(0.0, 100.0))
+}
+
+/// Pull a page count out of a `vm_stat` line like
+/// `Pages active:    123456.` — digits only, trailing '.' dropped.
+#[cfg_attr(not(target_os = "macos"), allow(dead_code))]
+fn parse_vm_stat_pages(vm_stat: &str, key: &str) -> Option<u64> {
+    for line in vm_stat.lines() {
+        if let Some(rest) = line.trim_start().strip_prefix(key) {
+            let digits: String = rest.chars().filter(|c| c.is_ascii_digit()).collect();
+            if !digits.is_empty() {
+                return digits.parse::<u64>().ok();
+            }
+        }
+    }
+    None
+}
+
+/// Read the page size from a `vm_stat` header line, e.g.
+/// "Mach Virtual Memory Statistics: (page size of 16384 bytes)". Using
+/// the dump's own page size keeps the byte math consistent with its
+/// page counts (Apple Silicon is 16 KiB, Intel 4 KiB, and `hw.pagesize`
+/// doesn't always agree with the VM page size).
+#[cfg_attr(not(target_os = "macos"), allow(dead_code))]
+fn parse_vm_stat_page_size(vm_stat: &str) -> Option<u64> {
+    let line = vm_stat.lines().next()?;
+    let after = line.split("page size of").nth(1)?;
+    let tok = after.split_whitespace().next()?;
+    let digits: String = tok.chars().filter(|c| c.is_ascii_digit()).collect();
+    digits.parse::<u64>().ok().filter(|&n| n > 0)
+}
+
+/// macOS system "used" memory ≈ (active + wired + compressed) pages ×
+/// page size — the same components Activity Monitor reports as "Memory
+/// Used". `None` if any component line is absent.
+#[cfg_attr(not(target_os = "macos"), allow(dead_code))]
+fn parse_vm_stat_used_bytes(vm_stat: &str, page_bytes: u64) -> Option<u64> {
+    let active = parse_vm_stat_pages(vm_stat, "Pages active:")?;
+    let wired = parse_vm_stat_pages(vm_stat, "Pages wired down:")?;
+    let compressed = parse_vm_stat_pages(vm_stat, "Pages occupied by compressor:")?;
+    Some((active + wired + compressed).saturating_mul(page_bytes))
+}
+
 #[cfg(target_os = "linux")]
 fn cpu_brand() -> Option<String> {
     let content = std::fs::read_to_string("/proc/cpuinfo").ok()?;
@@ -450,9 +511,20 @@ fn sample_cpu() -> (Option<f64>, Option<f64>) {
         .and_then(|b| String::from_utf8(b).ok())
         .and_then(|s| s.trim().parse::<f64>().ok())
         .map(|v| (v / cpus).clamp(0.0, 100.0));
-    // Total system CPU% on macOS would need host_statistics — skip and
-    // leave as None. The UI handles the missing value cleanly.
-    (app_pct, None)
+    // System CPU%: sum every process's ps %cpu (each a share of one
+    // core) and normalise by core count. ps reports a decaying average
+    // rather than a true instant, but it's a single fast call — no
+    // host_statistics FFI and no `top -l 2` second-sample stall that
+    // would block the poll — and tracks "is the machine busy" well
+    // enough for the load readout.
+    let total_pct = quiet_command("ps")
+        .args(["-A", "-o", "%cpu="])
+        .output()
+        .ok()
+        .filter(|o| o.status.success())
+        .and_then(|o| String::from_utf8(o.stdout).ok())
+        .and_then(|s| parse_total_cpu_pct(&s, cpus));
+    (app_pct, total_pct)
 }
 
 #[cfg(target_os = "windows")]
@@ -541,7 +613,6 @@ fn win_process_cpu_seconds() -> Option<f64> {
 #[cfg(target_os = "windows")]
 fn win_total_cpu_times() -> Option<(u64, u64)> {
     // (idle ticks, total ticks). Returns 100ns ticks summed across cores.
-    use std::ffi::c_void;
     type Bool = i32;
     #[repr(C)]
     struct Filetime {
@@ -635,9 +706,30 @@ fn sample_ram() -> (Option<u64>, Option<u64>, Option<u64>) {
         })
         .and_then(|b| String::from_utf8(b).ok())
         .and_then(|s| s.trim().parse::<u64>().ok());
-    // System "used" via `vm_stat` page-counting is fiddly — leave as None;
-    // the UI handles missing values.
-    (app, total, None)
+    // System "used" ≈ (active + wired + compressed) pages × page size,
+    // the components Activity Monitor sums as "Memory Used". Page size
+    // differs by arch (16 KiB on Apple Silicon, 4 KiB on Intel), so
+    // read it rather than assume.
+    let page = quiet_command("sysctl")
+        .args(["-n", "hw.pagesize"])
+        .output()
+        .ok()
+        .filter(|o| o.status.success())
+        .and_then(|o| String::from_utf8(o.stdout).ok())
+        .and_then(|s| s.trim().parse::<u64>().ok())
+        .unwrap_or(4096);
+    let used = quiet_command("vm_stat")
+        .output()
+        .ok()
+        .filter(|o| o.status.success())
+        .and_then(|o| String::from_utf8(o.stdout).ok())
+        .and_then(|s| {
+            // Prefer vm_stat's own header page size; fall back to the
+            // sysctl value, then a 4 KiB default.
+            let page = parse_vm_stat_page_size(&s).unwrap_or(page);
+            parse_vm_stat_used_bytes(&s, page)
+        });
+    (app, total, used)
 }
 
 #[cfg(target_os = "windows")]
@@ -829,3 +921,56 @@ fn nvidia_app_vram_bytes() -> Option<u64> {
     }
     None
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn total_cpu_pct_sums_and_normalises() {
+        // Four processes at 50% of one core each, on a 4-core box → 50%.
+        let out = "50.0\n50.0\n50.0\n50.0\n";
+        let pct = parse_total_cpu_pct(out, 4.0).unwrap();
+        assert!((pct - 50.0).abs() < 1e-6, "got {pct}");
+    }
+
+    #[test]
+    fn total_cpu_pct_handles_blanks_and_clamps() {
+        assert_eq!(parse_total_cpu_pct("", 4.0), None);
+        assert_eq!(parse_total_cpu_pct("   \n  \n", 4.0), None);
+        // Over-100% sum (transient ps quirk) clamps to 100.
+        assert_eq!(parse_total_cpu_pct("800.0\n", 4.0).unwrap(), 100.0);
+        // Zero/garbage cpu count is treated as 1, not a divide-by-zero.
+        assert_eq!(parse_total_cpu_pct("10.0\n", 0.0).unwrap(), 10.0);
+    }
+
+    #[test]
+    fn vm_stat_used_sums_active_wired_compressed() {
+        let vm = "Mach Virtual Memory Statistics: (page size of 16384 bytes)\n\
+Pages free:                          100000.\n\
+Pages active:                        200000.\n\
+Pages inactive:                      150000.\n\
+Pages speculative:                     5000.\n\
+Pages wired down:                    100000.\n\
+Pages occupied by compressor:         50000.\n";
+        // (200000 + 100000 + 50000) pages × 16384 bytes.
+        let used = parse_vm_stat_used_bytes(vm, 16384).unwrap();
+        assert_eq!(used, 350_000u64 * 16384);
+    }
+
+    #[test]
+    fn vm_stat_page_size_parsed_from_header() {
+        let vm = "Mach Virtual Memory Statistics: (page size of 16384 bytes)\n\
+Pages active: 1.\n";
+        assert_eq!(parse_vm_stat_page_size(vm), Some(16384));
+        assert_eq!(parse_vm_stat_page_size("no header here"), None);
+    }
+
+    #[test]
+    fn vm_stat_used_none_when_a_component_missing() {
+        // Has active + wired but no compressor line → None.
+        let vm = "Pages active: 10.\nPages wired down: 20.\n";
+        assert_eq!(parse_vm_stat_used_bytes(vm, 4096), None);
+        assert_eq!(parse_vm_stat_used_bytes("garbage", 4096), None);
+    }
+}
diff --git a/src/config.ts b/src/config.ts
index b7bda88..43235c6 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -111,6 +111,9 @@ const DEFAULT_CONFIG: Config = {
   // active_mode they persisted (mergeDefaults overlays raw on top).
   active_mode: "transcribe",
   model_cleanup_days: 1,
+  ollama_keep_alive: "30m",
+  ollama_throttle: "io",
+  warm_on_startup: true,
   cleanup_warning_suppressed_families: [],
   kept_models: [],
   mode_overrides: {},
diff --git a/src/types.ts b/src/types.ts
index 77d2fe9..ed92052 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -11,6 +11,27 @@ export interface HardwareProfile {
   soc?: string | null;
 }
 
+/** Live resource snapshot — mirrors the `LiveSnapshot` struct in
+ *  src-tauri/src/usage.rs, returned by the `usage_live_snapshot`
+ *  Tauri command. Every counter is optional on the Rust side so the
+ *  UI renders "—" when a platform doesn't expose it. Shared between
+ *  the Usage settings tab and the chat model-loading dialog so both
+ *  read the same system lookups. */
+export interface LiveSnapshot {
+  cpu_app_pct: number | null;
+  cpu_total_pct: number | null;
+  ram_app_bytes: number | null;
+  ram_total_bytes: number | null;
+  ram_used_bytes: number | null;
+  gpu_pct: number | null;
+  vram_app_bytes: number | null;
+  vram_used_bytes: number | null;
+  vram_total_bytes: number | null;
+  process_uptime_seconds: number;
+  cpu_brand: string | null;
+  cpu_count: number | null;
+}
+
 export type Mode = "text" | "vision" | "code" | "transcribe" | "diarize";
 
 /** Runtimes the resolver knows how to dispatch to.
@@ -469,6 +490,25 @@ export interface Config {
   active_family: string;
   active_mode: Mode;
   model_cleanup_days: number;
+  /** Ollama `keep_alive` for chat requests — how long the model stays
+   *  resident in RAM/VRAM after a turn before Ollama unloads it.
+   *  Native Ollama duration format: "30m", "1h", "0" (unload right
+   *  away, frees memory for transcription), "-1" (keep until evicted).
+   *  Longer values avoid cold-start reloads between messages; shorter
+   *  values suit memory-tight machines. Default "30m". */
+  ollama_keep_alive: string;
+  /** How hard to throttle the Ollama server we spawn while it loads a
+   *  model, so the disk thrash doesn't freeze the machine. "off" = no
+   *  throttle; "io" = disk-IO priority only (keeps inference full speed,
+   *  the default); "aggressive" = also demote CPU/QoS (most responsive
+   *  desktop, slower inference). Only applies when MyOwnLLM spawns Ollama
+   *  itself — not when it's a system/tray service. */
+  ollama_throttle: "off" | "io" | "aggressive";
+  /** Warm (preload) the active chat model in the background at startup so
+   *  the first message doesn't pay the cold-load wait. On by default; the
+   *  load runs under the configured throttle so it doesn't lock up the
+   *  machine. Can be turned off in Settings → Performance. */
+  warm_on_startup: boolean;
   /** Family names for which the user has dismissed the
    *  "switching with auto-cleanup on" confirmation in the family
    *  detail view's per-tier picker. Per-family rather than per-tier
diff --git a/src/ui/App.svelte b/src/ui/App.svelte
index acf7ef7..75f6a26 100644
--- a/src/ui/App.svelte
+++ b/src/ui/App.svelte
@@ -7,6 +7,7 @@
   import Chat from "./Chat.svelte";
   import TranscribeView from "./TranscribeView.svelte";
   import Sidebar from "./Sidebar.svelte";
+  import LoadingPulse from "./LoadingPulse.svelte";
   import PermissionPromptModal from "./PermissionPromptModal.svelte";
   import { loadConfig, updateConfig } from "../config";
   import { getActiveManifest } from "../providers";
@@ -83,6 +84,12 @@
   type View = "loading" | "chat";
 
   let view = $state<View>("loading");
+  /** True while the startup warm is in flight: we keep a full-screen
+   *  loading screen up (rather than dropping into a chat that feels
+   *  sluggish because it's competing with the cold load) until the model
+   *  is resident. The chat still mounts behind it, so it's fully ready
+   *  when the screen lifts. A "Continue" button is the escape hatch. */
+  let warming = $state(false);
   let appVersion = $state("");
   let hardware = $state<HardwareProfile | null>(null);
   let activeModel = $state("");
@@ -276,6 +283,31 @@
       // run the install lazily).
       view = "chat";
       invoke("ollama_ensure_running").catch(() => {});
+
+      // Warm the chat model so the first message doesn't pay the cold-load
+      // wait. On by default; the load runs under the configured throttle
+      // (Settings → Performance). Skipped when the user turned it off, when
+      // the model isn't on disk yet (the download overlay owns that), or
+      // when keep_alive is "0" (warming would just load-then-unload).
+      //
+      // We hold a full-screen loading screen (`warming`) over the chat —
+      // which keeps mounting/initializing behind it — until the warm
+      // settles, so the user lands on a chat that's actually ready instead
+      // of one that feels sluggish while it competes with the cold load.
+      if (
+        config.warm_on_startup !== false &&
+        pendingTextModel &&
+        !textModelMissing &&
+        config.ollama_keep_alive !== "0"
+      ) {
+        warming = true;
+        invoke("ollama_warm", { model: pendingTextModel })
+          .catch(() => {})
+          .finally(() => {
+            warming = false;
+          });
+      }
+
       kickUpdateCheck();
 
       // Seed the sidebar early so it's ready when the chat view paints.
@@ -1143,6 +1175,23 @@
        the same modal; the modal self-hides when the prompt queue
        drains. -->
   <PermissionPromptModal />
+
+  {#if warming}
+    <!-- Startup warm: keep a loading screen up until the model is
+         resident. The chat mounts behind this, so it's ready the moment
+         the screen lifts. Same shining word + live CPU/RAM as the in-chat
+         indicator, under the spinner. Continue is the escape hatch. -->
+    <div class="warming-overlay">
+      <div class="spinner"></div>
+      <LoadingPulse showStats={true} />
+      <button class="warming-skip" onclick={() => (warming = false)}>
+        Continue to chat →
+      </button>
+      {#if appVersion}
+        <p class="splash-version">v{appVersion}</p>
+      {/if}
+    </div>
+  {/if}
 </div>
 
 <style>
@@ -1317,6 +1366,35 @@
     color: #555;
     margin-top: -0.5rem;
   }
+  /* Startup-warm loading screen. Full-screen, opaque, same look as the
+     initial splash (spinner on top, LoadingPulse just beneath). */
+  .warming-overlay {
+    position: fixed;
+    inset: 0;
+    z-index: 55;
+    background: #111;
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    justify-content: center;
+    gap: 1rem;
+    color: #888;
+  }
+  .warming-skip {
+    margin-top: 0.25rem;
+    background: none;
+    border: 1px solid #2a2a2a;
+    color: #888;
+    padding: 0.4rem 0.8rem;
+    border-radius: 6px;
+    font-size: 0.8rem;
+    cursor: pointer;
+    transition: color 0.12s, border-color 0.12s;
+  }
+  .warming-skip:hover {
+    color: #ccc;
+    border-color: #3a3a55;
+  }
   .spinner {
     width: 28px;
     height: 28px;
diff --git a/src/ui/Chat.svelte b/src/ui/Chat.svelte
index afd4856..4e99b3a 100644
--- a/src/ui/Chat.svelte
+++ b/src/ui/Chat.svelte
@@ -1,9 +1,11 @@
 <script lang="ts">
   import { invoke } from "@tauri-apps/api/core";
+  import { tick } from "svelte";
   import TopBar from "./TopBar.svelte";
   import TextBar from "./TextBar.svelte";
   import SettingsPanel from "./SettingsPanel.svelte";
   import DownloadOverlay from "./DownloadOverlay.svelte";
+  import LoadingPulse from "./LoadingPulse.svelte";
   import {
     loadConversation,
     saveConversation,
@@ -120,6 +122,45 @@
   let input = $state("");
   let streaming = $state(false);
 
+  /** Cold-start UX: Ollama loads a model's weights into RAM/VRAM on
+   *  the first request after it was evicted (or never loaded this
+   *  session). That gap — between firing the chat stream and the
+   *  first token coming back — is otherwise silent. When it runs long
+   *  we swap the typing dots for a `LoadingPulse` (rotating reassurance
+   *  word + live CPU/RAM) so the user knows the app isn't wedged. Once
+   *  any frame arrives (delta, tool call, or terminal event) the model
+   *  is resident and we clear it; we don't re-arm for later turns in
+   *  the same run since the model is warm by then. The indicator owns
+   *  its own word rotation + usage poll, mounting/unmounting with
+   *  `modelLoading`. */
+  const MODEL_LOAD_POPUP_DELAY_MS = 5000;
+  let modelLoading = $state(false);
+  let modelLoadTimer: ReturnType<typeof setTimeout> | null = null;
+
+  /** Clear the load indicator + its arming timer. Idempotent, so it's
+   *  safe to call from every agent event and from cleanup. */
+  function clearModelLoadWait() {
+    if (modelLoadTimer !== null) {
+      clearTimeout(modelLoadTimer);
+      modelLoadTimer = null;
+    }
+    if (modelLoading) modelLoading = false;
+  }
+
+  /** Resolve once the browser has actually painted: a Svelte tick to
+   *  flush the DOM update, then two animation frames so the compositor
+   *  draws the frame. We await this after showing the indicator and
+   *  before kicking off a cold model load — a heavy load can thrash a
+   *  laptop badly enough that an un-painted indicator would never appear. */
+  function nextPaint(): Promise<void> {
+    return tick().then(
+      () =>
+        new Promise<void>((resolve) => {
+          requestAnimationFrame(() => requestAnimationFrame(() => resolve()));
+        }),
+    );
+  }
+
   /** One pending attachment staged for the next send. Images become
    *  Ollama-style `images: [base64]` array entries on the user
    *  message; text-like files (JSON, configs, source, plain text)
@@ -838,6 +879,29 @@
       enabledToolSet.has(t.definition.function.name),
     );
 
+    // Cold-start dialog. A model that isn't resident yet can thrash a
+    // laptop hard enough that a reactively-delayed dialog never gets to
+    // paint — so when we can confirm (via Ollama's /api/ps) that the
+    // model isn't loaded, we show the dialog and force a paint BEFORE
+    // firing the request. For the warm case, the remote path, or an
+    // unknown ps result, we keep the lightweight 5s reactive timer.
+    let coldStart = false;
+    if (!routeViaDevicePubkey) {
+      try {
+        coldStart = !(await invoke<boolean>("ollama_model_loaded", { model: activeModel }));
+      } catch {
+        // ps unavailable — fall back to the reactive timer below.
+      }
+    }
+    if (coldStart) {
+      modelLoading = true;
+      await nextPaint(); // get the indicator on screen before the load freeze
+    } else {
+      modelLoadTimer = setTimeout(() => {
+        modelLoading = true;
+      }, MODEL_LOAD_POPUP_DELAY_MS);
+    }
+
     try {
       await runAgent({
         messages: working,
@@ -849,6 +913,9 @@
         viaDevicePubkey: routeViaDevicePubkey,
         signal: controller.signal,
         onEvent: (event: AgentEvent) => {
+          // Any frame means the model is resident and producing —
+          // tear down the load-wait dialog (idempotent).
+          clearModelLoadWait();
           switch (event.kind) {
             case "assistant_delta":
             case "thinking_delta": {
@@ -916,6 +983,9 @@
       streaming = false;
       agentAbortController = null;
       inFlightToolCallIds = new Set();
+      // Belt-and-suspenders: if the run ended before any frame (error
+      // thrown, instant cancel), the timer/dialog could still be live.
+      clearModelLoadWait();
       // Drop the streaming flag on any straggler bubble so its
       // <details> can collapse cleanly once the answer is in.
       if (liveIdx !== -1 && liveIdx < messages.length) {
@@ -958,6 +1028,9 @@
     // `infer_cancel` (mesh path) on whatever turn is in flight, then
     // unwinds the loop without starting another round.
     agentAbortController?.abort();
+    // Drop the load-wait dialog right away rather than waiting for the
+    // stream to unwind through the finally block.
+    clearModelLoadWait();
   }
 
   function onKeydown(e: KeyboardEvent) {
@@ -1258,7 +1331,18 @@
     {/each}
     {#if streaming && (messages.length === 0 || messages[messages.length - 1].role !== "assistant")}
       <div class="message assistant">
-        <div class="bubble"><span class="dots"><span></span><span></span><span></span></span></div>
+        <div class="bubble">
+          {#if modelLoading}
+            <!-- Cold-start (or a long-running call): the model is loading
+                 / still working. Replace the typing dots in place (no
+                 jolting modal) with the calmer LoadingPulse — a rotating
+                 reassurance word + live CPU/RAM. Stats are hidden for the
+                 remote path (the load is on the host's machine). -->
+            <LoadingPulse showStats={!routeViaDevicePubkey} />
+          {:else}
+            <span class="dots"><span></span><span></span><span></span></span>
+          {/if}
+        </div>
       </div>
     {/if}
   </div>
@@ -1416,6 +1500,7 @@
     flex-direction: column;
     position: relative;
   }
+
   .chat-body {
     flex: 1;
     min-height: 0;
diff --git a/src/ui/LoadingPulse.svelte b/src/ui/LoadingPulse.svelte
new file mode 100644
index 0000000..cbbf92d
--- /dev/null
+++ b/src/ui/LoadingPulse.svelte
@@ -0,0 +1,117 @@
+<script lang="ts">
+  // Calm "still working, not frozen" indicator: a reassurance word that
+  // rotates every few seconds with a moving shine, plus an optional quiet
+  // live CPU/RAM line as proof of life. Self-contained — it owns its word
+  // rotation and (when showStats) its usage poll — so it can be dropped in
+  // both the in-chat cold-start bubble and the startup warming screen.
+  import { onMount, onDestroy } from "svelte";
+  import { invoke } from "@tauri-apps/api/core";
+  import type { LiveSnapshot } from "../types";
+
+  let { showStats = true }: { showStats?: boolean } = $props();
+
+  // Deliberately ambiguous about *what's* happening: this same indicator
+  // covers both a cold model load and a slow in-progress turn, so phrases
+  // like "Loading the model…" would wrongly suggest a reload mid-chat.
+  // These just reassure that work is underway, whatever the cause.
+  const WORDS = [
+    "Working on it…",
+    "Thinking it through…",
+    "Crunching…",
+    "Hang tight…",
+    "Still working…",
+    "Just a moment…",
+    "Almost there…",
+  ];
+  const WORD_MS = 3000;
+  const STATS_POLL_MS = 1200;
+
+  let wordIdx = $state(0);
+  let live = $state<LiveSnapshot | null>(null);
+  let wordTimer: ReturnType<typeof setInterval> | null = null;
+  let statsTimer: ReturnType<typeof setInterval> | null = null;
+
+  function fmtGb(bytes: number | null | undefined): string {
+    if (bytes == null) return "—";
+    return `${(bytes / 1024 ** 3).toFixed(1)} GB`;
+  }
+
+  async function refresh() {
+    try {
+      live = await invoke<LiveSnapshot>("usage_live_snapshot");
+    } catch {
+      // Non-fatal: the word still rotates without the resource line.
+    }
+  }
+
+  onMount(() => {
+    wordTimer = setInterval(() => {
+      wordIdx = (wordIdx + 1) % WORDS.length;
+    }, WORD_MS);
+    if (showStats) {
+      void refresh(); // prime the CPU delta cache immediately
+      statsTimer = setInterval(() => void refresh(), STATS_POLL_MS);
+    }
+  });
+
+  onDestroy(() => {
+    if (wordTimer) clearInterval(wordTimer);
+    if (statsTimer) clearInterval(statsTimer);
+  });
+</script>
+
+<div class="loading-inline" aria-live="polite">
+  {#key wordIdx}
+    <span class="loading-word">{WORDS[wordIdx]}</span>
+  {/key}
+  {#if showStats && live}
+    <span class="loading-meta">
+      {#if live.cpu_total_pct != null}CPU {Math.round(live.cpu_total_pct)}%{/if}
+      {#if live.ram_used_bytes != null && live.ram_total_bytes != null}
+        · RAM {fmtGb(live.ram_used_bytes)}/{fmtGb(live.ram_total_bytes)}
+      {/if}
+    </span>
+  {/if}
+</div>
+
+<style>
+  .loading-inline {
+    display: flex;
+    flex-direction: column;
+    gap: 0.25rem;
+  }
+  .loading-word {
+    display: inline-block;
+    font-size: 0.9rem;
+    font-weight: 500;
+    background: linear-gradient(
+      90deg,
+      #8a8a8a 0%,
+      #8a8a8a 38%,
+      #eaeaff 50%,
+      #8a8a8a 62%,
+      #8a8a8a 100%
+    );
+    background-size: 220% 100%;
+    -webkit-background-clip: text;
+    background-clip: text;
+    -webkit-text-fill-color: transparent;
+    color: transparent;
+    animation:
+      loading-word-in 0.4s ease-out,
+      loading-shine 2.4s linear infinite;
+  }
+  @keyframes loading-shine {
+    0% { background-position: 160% 0; }
+    100% { background-position: -160% 0; }
+  }
+  @keyframes loading-word-in {
+    from { opacity: 0; transform: translateY(2px); }
+    to { opacity: 1; transform: translateY(0); }
+  }
+  .loading-meta {
+    font-size: 0.72rem;
+    color: #6a6a6a;
+    font-variant-numeric: tabular-nums;
+  }
+</style>
diff --git a/src/ui/SettingsPanel.svelte b/src/ui/SettingsPanel.svelte
index 05436c3..4748e49 100644
--- a/src/ui/SettingsPanel.svelte
+++ b/src/ui/SettingsPanel.svelte
@@ -3,6 +3,7 @@
   import ModelsSection from "./settings/ModelsSection.svelte";
   import StorageSection from "./settings/StorageSection.svelte";
   import HardwareSection from "./settings/HardwareSection.svelte";
+  import PerformanceSection from "./settings/PerformanceSection.svelte";
   import UsageSection from "./settings/UsageSection.svelte";
   import UpdatesSection from "./settings/UpdatesSection.svelte";
   import CloudMeshSection from "./settings/CloudMeshSection.svelte";
@@ -18,6 +19,7 @@
     | "prompts"
     | "permissions"
     | "hardware"
+    | "performance"
     | "storage"
     | "usage"
     | "cloud-mesh"
@@ -81,6 +83,7 @@
     { id: "prompts", label: "Prompts" },
     { id: "permissions", label: "Permissions" },
     { id: "hardware", label: "Hardware" },
+    { id: "performance", label: "Performance" },
     { id: "storage", label: "Storage" },
     { id: "usage", label: "Usage" },
     { id: "updates", label: "Updates" },
@@ -151,6 +154,8 @@
         <StorageSection setActive={(t) => (active = t)} />
       {:else if active === "hardware"}
         <HardwareSection setActive={(t) => (active = t)} />
+      {:else if active === "performance"}
+        <PerformanceSection />
       {:else if active === "usage"}
         <UsageSection />
       {:else if active === "cloud-mesh"}
diff --git a/src/ui/settings/PerformanceSection.svelte b/src/ui/settings/PerformanceSection.svelte
new file mode 100644
index 0000000..54d939c
--- /dev/null
+++ b/src/ui/settings/PerformanceSection.svelte
@@ -0,0 +1,244 @@
+<script lang="ts">
+  import { onMount } from "svelte";
+  import { loadConfig, updateConfig } from "../../config";
+  import { scrollAffordance } from "../scroll-affordance";
+
+  let loading = $state(true);
+  let error = $state("");
+
+  /** Ollama `keep_alive` for chat — how long the model stays resident in
+   *  memory after a turn. Longer avoids cold-start reloads between
+   *  messages; shorter frees RAM/VRAM. Ollama's native duration format. */
+  let keepAlive = $state("30m");
+  const KEEP_ALIVE_OPTIONS: { value: string; label: string }[] = [
+    { value: "0", label: "Unload immediately (lowest memory)" },
+    { value: "5m", label: "5 minutes (Ollama default)" },
+    { value: "30m", label: "30 minutes (recommended)" },
+    { value: "1h", label: "1 hour" },
+    { value: "-1", label: "Until the app quits (keep resident)" },
+  ];
+
+  /** How hard to throttle the Ollama server while it loads a model so the
+   *  disk thrash doesn't freeze the machine. */
+  type Throttle = "off" | "io" | "aggressive";
+  let throttle = $state<Throttle>("io");
+  const THROTTLE_OPTIONS: { value: Throttle; label: string; hint: string }[] = [
+    {
+      value: "off",
+      label: "Off (fastest load)",
+      hint: "No throttle. Loads fastest, but a big model can saturate the CPU and briefly freeze the machine.",
+    },
+    {
+      value: "io",
+      label: "Balanced (recommended)",
+      hint: "Lowers the model's priority a notch so the system — display, networking — keeps enough CPU to stay responsive during a load, while inference still gets the bulk of the cores.",
+    },
+    {
+      value: "aggressive",
+      label: "Aggressive (most responsive)",
+      hint: "Deeply deprioritizes the model. Keeps the desktop snappiest during a load, but token generation runs noticeably slower.",
+    },
+  ];
+
+  /** Preload the chat model at startup so the first message is instant.
+   *  On by default; the load runs under the throttle above. */
+  let warmOnStartup = $state(true);
+
+  onMount(async () => {
+    try {
+      const config = await loadConfig();
+      keepAlive = config.ollama_keep_alive ?? "30m";
+      throttle = (config.ollama_throttle ?? "io") as Throttle;
+      warmOnStartup = config.warm_on_startup ?? true;
+    } catch (e) {
+      error = String(e);
+    } finally {
+      loading = false;
+    }
+  });
+
+  async function patchWarmOnStartup(value: boolean) {
+    warmOnStartup = value;
+    await updateConfig({ warm_on_startup: value });
+  }
+
+  async function patchKeepAlive(value: string) {
+    keepAlive = value;
+    await updateConfig({ ollama_keep_alive: value });
+  }
+
+  async function patchThrottle(value: Throttle) {
+    throttle = value;
+    await updateConfig({ ollama_throttle: value });
+  }
+
+  const throttleHint = $derived(
+    THROTTLE_OPTIONS.find((o) => o.value === throttle)?.hint ?? "",
+  );
+</script>
+
+<div class="section">
+  <div class="head">
+    <p class="lede">
+      Tune how MyOwnLLM trades <strong>responsiveness</strong> against
+      <strong>load and inference speed</strong> when running models locally.
+      These apply to the Ollama server MyOwnLLM launches itself.
+    </p>
+  </div>
+
+  {#if loading}
+    <p class="loading">Loading…</p>
+  {:else if error}
+    <p class="error">{error}</p>
+  {:else}
+    <div class="scroll-affordance-wrap">
+    <div class="cards scroll-fade" use:scrollAffordance>
+      <div class="group-label">Model memory</div>
+
+      <div class="card">
+        <div class="card-title">Keep model loaded</div>
+        <p class="card-meta">
+          How long the chat model stays in memory after a reply. Longer
+          keeps later messages instant; shorter frees RAM/VRAM sooner —
+          handy when transcription needs to run alongside on a
+          memory-tight machine.
+        </p>
+        <dl class="info">
+          <div class="full">
+            <dt>Keep model loaded for</dt>
+            <dd>
+              <select
+                value={keepAlive}
+                onchange={(e) => patchKeepAlive((e.currentTarget as HTMLSelectElement).value)}
+              >
+                {#if !KEEP_ALIVE_OPTIONS.some((o) => o.value === keepAlive)}
+                  <option value={keepAlive}>Custom: {keepAlive}</option>
+                {/if}
+                {#each KEEP_ALIVE_OPTIONS as opt (opt.value)}
+                  <option value={opt.value}>{opt.label}</option>
+                {/each}
+              </select>
+            </dd>
+          </div>
+        </dl>
+      </div>
+
+      <div class="group-label">Loading</div>
+
+      <div class="card">
+        <div class="card-title">Load throttle</div>
+        <p class="card-meta">
+          Loading a model reads gigabytes from disk, which can freeze a
+          laptop. This throttles those reads so the machine stays usable.
+          Loading is disk-bound and inference is compute-bound, so the
+          balanced default eases disk only and leaves token generation at
+          full speed.
+        </p>
+        <dl class="info">
+          <div class="full">
+            <dt>While a model loads</dt>
+            <dd>
+              <select
+                value={throttle}
+                onchange={(e) => patchThrottle((e.currentTarget as HTMLSelectElement).value as Throttle)}
+              >
+                {#each THROTTLE_OPTIONS as opt (opt.value)}
+                  <option value={opt.value}>{opt.label}</option>
+                {/each}
+              </select>
+            </dd>
+          </div>
+        </dl>
+        {#if throttleHint}
+          <p class="card-meta hint">{throttleHint}</p>
+        {/if}
+      </div>
+
+      <div class="card">
+        <div class="card-title">Warm at startup</div>
+        <p class="card-meta">
+          Preload the chat model in the background when the app starts, so
+          your first message doesn't wait for it to load. The load runs
+          under the throttle above, so it won't lock up the machine.
+        </p>
+        <label class="toggle">
+          <input
+            type="checkbox"
+            checked={warmOnStartup}
+            onchange={(e) => patchWarmOnStartup((e.currentTarget as HTMLInputElement).checked)}
+          />
+          Warm the chat model at startup
+        </label>
+      </div>
+
+      <p class="footnote">
+        Throttling only applies when MyOwnLLM starts the Ollama server
+        itself. If Ollama is already running as a system or tray service,
+        these settings don't affect it.
+      </p>
+    </div>
+    <div class="scroll-more-hint" aria-hidden="true">
+      <span class="scroll-more-chevron">⌄</span>
+      <span>more below</span>
+    </div>
+    </div>
+  {/if}
+</div>
+
+<style>
+  .section { display: flex; flex-direction: column; height: 100%; min-height: 0; }
+  .head { padding: .75rem 1rem; border-bottom: 1px solid #1e1e1e; flex-shrink: 0; }
+  .lede { font-size: .78rem; color: #888; line-height: 1.5; }
+  .lede strong { color: #ccc; font-weight: 600; }
+
+  .loading, .error { padding: 2rem; text-align: center; color: #555; font-size: .82rem; }
+  .error { color: #d66; }
+
+  .cards { flex: 1; overflow-y: scroll; padding: .75rem; display: flex; flex-direction: column; gap: .6rem; min-height: 0; --scroll-fade-bg: #111; }
+  .group-label {
+    font-size: .68rem; color: #666; text-transform: uppercase;
+    letter-spacing: .06em; margin: .35rem .15rem -.1rem;
+  }
+  .group-label:first-child { margin-top: 0; }
+
+  .card {
+    border: 1px solid #1e1e1e;
+    background: #131318;
+    border-radius: 8px;
+    padding: .75rem .9rem;
+    display: flex; flex-direction: column; gap: .5rem;
+  }
+  .card-title { font-size: .9rem; font-weight: 600; color: #e8e8e8; }
+  .card-meta { font-size: .76rem; color: #888; line-height: 1.5; margin: 0; }
+  .card-meta.hint { color: #9a9ad6; }
+
+  .info { margin: 0; display: grid; grid-template-columns: repeat(auto-fit, minmax(140px, 1fr)); gap: .65rem; }
+  .info > div { display: flex; flex-direction: column; gap: .2rem; min-width: 0; }
+  .info > div.full { grid-column: 1 / -1; }
+  dt { font-size: .68rem; color: #666; text-transform: uppercase; letter-spacing: .03em; }
+  dd { margin: 0; font-size: .82rem; color: #ccc; display: flex; align-items: center; gap: .35rem; flex-wrap: wrap; }
+
+  select {
+    background: #0f0f12;
+    color: #e8e8e8;
+    border: 1px solid #2a2a2a;
+    border-radius: 6px;
+    padding: .3rem .4rem;
+    font-size: .8rem;
+    font-family: inherit;
+    max-width: 100%;
+  }
+  select:focus { outline: none; border-color: #6e6ef7; }
+
+  .footnote { font-size: .72rem; color: #555; line-height: 1.5; padding: .35rem .15rem 0; margin: 0; }
+
+  .toggle {
+    display: inline-flex;
+    align-items: center;
+    gap: .45rem;
+    font-size: .82rem;
+    color: #ccc;
+    cursor: pointer;
+  }
+  .toggle input { accent-color: #6e6ef7; }
+</style>
diff --git a/src/ui/settings/UsageSection.svelte b/src/ui/settings/UsageSection.svelte
index 4912282..150fd21 100644
--- a/src/ui/settings/UsageSection.svelte
+++ b/src/ui/settings/UsageSection.svelte
@@ -2,24 +2,7 @@
   import { onMount, onDestroy } from "svelte";
   import { invoke } from "@tauri-apps/api/core";
   import { scrollAffordance } from "../scroll-affordance";
-
-  // Mirrors the LiveSnapshot struct in src-tauri/src/usage.rs. Every
-  // field is optional on the Rust side so we can render "—" when a
-  // platform doesn't expose the underlying counter.
-  interface LiveSnapshot {
-    cpu_app_pct: number | null;
-    cpu_total_pct: number | null;
-    ram_app_bytes: number | null;
-    ram_total_bytes: number | null;
-    ram_used_bytes: number | null;
-    gpu_pct: number | null;
-    vram_app_bytes: number | null;
-    vram_used_bytes: number | null;
-    vram_total_bytes: number | null;
-    process_uptime_seconds: number;
-    cpu_brand: string | null;
-    cpu_count: number | null;
-  }
+  import type { LiveSnapshot } from "../../types";
 
   // Mirrors UsageStats in src-tauri/src/usage.rs.
   interface UsageStats {