From 92a562b29fbf6addf71d020dce6c952b55fd7084 Mon Sep 17 00:00:00 2001 From: iret77 <63622643+iret77@users.noreply.github.com> Date: Sat, 30 May 2026 15:47:19 +0200 Subject: [PATCH 01/27] =?UTF-8?q?fix:=20host-lifetime=20invariant=20?= =?UTF-8?q?=E2=80=94=20single=20exit=20authority=20(v0.5.0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 1 of the stabilization plan (docs/architecture/stabilization-plan.md): collapse the host's lifetime onto one predicate and stop deriving "does anyone still need aiui?" from proxies (child count, window visibility) that 0.4.43–0.4.45 kept getting wrong. Single exit authority (Invariant I1): host_should_exit(explicit, cd_running) = explicit || !cd_running - lifetime.rs: pure host_should_exit + grace_outcome + ExitAuthority latch, all unit-tested (the Step-2 verification mini-harness: host survives a child flap while Claude Desktop runs; host exits on Claude-Desktop-quit). The shutdown watcher keeps the last-child-disconnect edge as a *trigger only* — it arms a short 5s grace (was 60s) and exits solely when Claude Desktop is gone and no child returned. One pgrep per edge, no continuous poll. - lib.rs: setup-window close = prevent_close + hide + Accessory demote (I2), never app.exit. RunEvent::ExitRequested = default-deny via host_should_exit; the child-count / pending-dialog veto is removed. ExitAuthority managed + latched by quit_app. - http.rs + updater.ts: both update-restart paths latch ExitAuthority before restart()/relaunch() so the default-deny gate honours case (c). - LifetimeStats child counter demoted to /health telemetry + start-trigger; it no longer gates exit. Prove-then-delete mapping for each retired exit path (grace-expired, setup-close-no-children, exit-requested-no-attached) is recorded in the plan doc. One deliberate deviation from the spec's prose (arm grace on every edge vs. "don't arm if CD alive") is documented there — it closes a CD-mid-quit race while preserving I1 exactly. Baseline + post-change green: cargo test (104 passed), clippy -D warnings, svelte-check (0 errors). Refs #137 Co-Authored-By: Claude Opus 4.8 (1M context) --- companion/src-tauri/Cargo.lock | 2 +- companion/src-tauri/Cargo.toml | 2 +- companion/src-tauri/src/http.rs | 8 + companion/src-tauri/src/lib.rs | 204 +++++++++---------- companion/src-tauri/src/lifetime.rs | 234 +++++++++++++++++++--- companion/src-tauri/tauri.conf.json | 2 +- companion/src/lib/updater.ts | 5 + docs/architecture/stabilization-plan.md | 248 ++++++++++++++++++++++++ 8 files changed, 577 insertions(+), 128 deletions(-) create mode 100644 docs/architecture/stabilization-plan.md diff --git a/companion/src-tauri/Cargo.lock b/companion/src-tauri/Cargo.lock index c400cc2..bf1470d 100644 --- a/companion/src-tauri/Cargo.lock +++ b/companion/src-tauri/Cargo.lock @@ -30,7 +30,7 @@ dependencies = [ [[package]] name = "aiui" -version = "0.4.46" +version = "0.5.0" dependencies = [ "axum", "base64 0.22.1", diff --git a/companion/src-tauri/Cargo.toml b/companion/src-tauri/Cargo.toml index 69a8acf..dfdd591 100644 --- a/companion/src-tauri/Cargo.toml +++ b/companion/src-tauri/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "aiui" -version = "0.4.46" +version = "0.5.0" description = "aiui companion — renders dialogs for remote Claude Code sessions" authors = ["byte5"] license = "" diff --git a/companion/src-tauri/src/http.rs b/companion/src-tauri/src/http.rs index a5b0ccc..b85ea9b 100644 --- a/companion/src-tauri/src/http.rs +++ b/companion/src-tauri/src/http.rs @@ -444,6 +444,14 @@ async fn update( let http_port = state.cfg.http_port; tokio::spawn(async move { tokio::time::sleep(Duration::from_millis(500)).await; + // Case (c): latch the single exit authority so the `ExitRequested` + // default-deny gate honours the restart-initiated exit instead of + // vetoing it (Invariant I1). `app.restart()` fires ExitRequested. + if let Some(auth) = + app_handle.try_state::>() + { + auth.authorize(); + } crate::housekeeping::pre_exit_cleanup(http_port, "updater-restart"); trace("update: restarting into new binary"); app_handle.restart(); diff --git a/companion/src-tauri/src/lib.rs b/companion/src-tauri/src/lib.rs index 768e8db..f953e03 100644 --- a/companion/src-tauri/src/lib.rs +++ b/companion/src-tauri/src/lib.rs @@ -432,6 +432,12 @@ fn open_url(url: String) -> Result<(), String> { /// running. Issue #72. #[tauri::command] async fn quit_app(app: tauri::AppHandle) -> Result<(), String> { + // Case (b): explicit uninstall. Latch the exit authority *first* so the + // `ExitRequested` default-deny gate honours the `app.exit(0)` below instead + // of vetoing it (Invariant I1). + if let Some(auth) = app.try_state::>() { + auth.authorize(); + } let killed = housekeeping::kill_all_mcp_stdio_children(); logging::trace(&format!( "quit_app: killed {killed} mcp-stdio child(ren) before exit" @@ -449,6 +455,21 @@ async fn quit_app(app: tauri::AppHandle) -> Result<(), String> { Ok(()) } +/// Latch the exit authority for case (c): an update-restart driven from the +/// frontend updater. `updater.ts` calls this immediately before +/// `@tauri-apps/plugin-process`'s `relaunch()`, which (like `app.restart()`) +/// fires `RunEvent::ExitRequested`. Without the latch the default-deny gate +/// would veto the relaunch and the update would never apply (Invariant I1). +/// The HTTP `/update` path latches the same authority directly in Rust. +#[tauri::command] +async fn authorize_exit_for_update( + exit_authority: tauri::State<'_, Arc>, +) -> Result<(), String> { + exit_authority.authorize(); + logging::trace("authorize_exit_for_update: exit authority latched for update-restart"); + Ok(()) +} + /// Quit + relaunch Claude Desktop so it re-reads `claude_desktop_config.json` /// and picks up the freshly-patched aiui MCP server entry. This is the /// "after-Setup nudge" the user otherwise has to figure out themselves. @@ -1173,6 +1194,12 @@ pub fn run() { let dialog_state = Arc::new(dialog::DialogState::new()); let ui_acks = Arc::new(ack::AckRegistry::new()); let lifetime_stats = Arc::new(lifetime::LifetimeStats::new()); + // Single exit authority (Invariant I1). Latched only by the two legitimate + // non-Wirt-death exits — uninstall (`quit_app`) and update-restart (HTTP + // `/update` + the frontend updater) — and read by the `ExitRequested` + // default-deny gate so those, and only those, Tauri-initiated terminations + // are honoured while Claude Desktop is alive. + let exit_authority = Arc::new(lifetime::ExitAuthority::new()); let tunnel_mgr = tunnel::TunnelManager::new(cfg.http_port); // Shared cell that records a fatal HTTP-server bind/serve failure (e.g. // port 7777 held by another process). Read by the `status` command and @@ -1258,6 +1285,7 @@ pub fn run() { .manage(dialog_state.clone()) .manage(ui_acks.clone()) .manage(lifetime_stats.clone()) + .manage(exit_authority.clone()) .manage(tunnel_mgr.clone()) .manage(http_error.clone()) .manage(pending_update.clone()) @@ -1282,6 +1310,7 @@ pub fn run() { restart_claude_desktop, uninstall_all, quit_app, + authorize_exit_for_update, dismiss_welcome, open_url ]) @@ -1561,34 +1590,28 @@ pub fn run() { Ok(()) }) .on_window_event(|window, event| { - // Multi-window lifecycle (v0.4.25, revised v0.4.36): + // Multi-window lifecycle (v0.4.25, revised v0.4.36, Invariant I2): // - // The setup window and the dialog window are independent. - // Closing one shouldn't kill the other — and definitely - // shouldn't kill the GUI process while the lifetime - // socket still has attached MCP-stdio children depending - // on it. + // The setup window and the dialog window are independent, and + // closing a window is never a process exit — the host lives with + // its Wirt (Claude Desktop), not with any window. // - // • Red X on setup window: setup goes away. If no other - // window is visible AND no MCP-stdio children are - // attached, the app quits and `mcp_attach`'s - // auto-resurrect path brings it back on the next tool - // call. As long as a child is attached, we stay alive - // headless — the lifetime grace timer (60s after the - // last child detaches) is the only legitimate - // "nobody needs aiui anymore" signal. - // • Red X on dialog window: the dialog is treated as - // cancelled (the frontend's CloseRequested-listener - // fires `dialog_cancel` first; this branch runs after). - // NEVER quits the app, regardless of any-visible state. - // The dialog window is per-call ephemeral — destroyed - // after every submit/cancel by `close_window`. Quitting - // the GUI here would tear down the HTTP server while - // the agent's tool call is still parsing the response, - // producing the 8s `wait_for_aiui` timeouts the user - // saw on 2026-05-04 (trace 16:11:42.197 "GUI is gone" - // 20 ms after a successful form submit). - if let tauri::WindowEvent::CloseRequested { .. } = event { + // • Red X on setup window: hide it + demote to Accessory (no Dock + // icon). The host stays alive headless; it is brought back to a + // visible Settings window via Dock-click / `open` (the Reopen + // handler) or the single-instance plugin. No exit here — the + // process only ends when the watcher sees the Wirt gone or an + // uninstall/update latches the exit authority. + // • Red X on dialog window: the dialog is treated as cancelled + // (the frontend's CloseRequested-listener fires `dialog_cancel` + // first; this branch runs after). NEVER quits the app. The + // dialog window is per-call ephemeral — destroyed after every + // submit/cancel by `close_window`. Quitting the GUI here would + // tear down the HTTP server while the agent's tool call is still + // parsing the response, producing the 8s `wait_for_aiui` + // timeouts the user saw on 2026-05-04 (trace 16:11:42.197 "GUI + // is gone" 20 ms after a successful form submit). + if let tauri::WindowEvent::CloseRequested { api, .. } = event { let app = window.app_handle(); let closed_label = window.label().to_string(); if closed_label == DIALOG_WINDOW_LABEL { @@ -1615,83 +1638,67 @@ pub fn run() { ); return; } - // Setup window: quit only if nothing else needs us. - let app_for_check = app.clone(); - let _ = app.run_on_main_thread(move || { - let any_visible = app_for_check - .webview_windows() - .iter() - .any(|(label, w)| { - label.as_str() != closed_label - && w.is_visible().unwrap_or(false) - }); - let attached = app_for_check - .try_state::>() - .map(|s| s.child_count()) - .unwrap_or(0); - if !any_visible && attached == 0 { - log::info!( - "[aiui] setup window closed and no MCP-stdio children attached — quitting; auto-resurrect will bring us back on next tool call" - ); - let port = app_for_check - .try_state::>() - .map(|c| c.http_port) - .unwrap_or(7777); - housekeeping::pre_exit_cleanup(port, "setup-close-no-children"); - app_for_check.exit(0); - } else { - log::debug!( - "[aiui] setup window closed, staying alive (visible_others={any_visible}, attached_children={attached})" - ); - } - }); + // Setup window: Invariant I2 — window close is NOT process + // exit. Hide the window and demote back to Accessory (no Dock + // icon) so aiui keeps living headless with its host (Claude + // Desktop), serving the lifetime socket + HTTP for local and + // remote dialogs. The process only ends when its Wirt quits + // (the watcher) or on an explicit uninstall/update — never + // because a user dismissed a window. The old + // `setup-close-no-children → app.exit(0)` path (which decided + // "nobody needs us" from the child count + window visibility, + // both proxies) is removed. + api.prevent_close(); + let _ = window.hide(); + #[cfg(target_os = "macos")] + { + let _ = app.set_activation_policy(tauri::ActivationPolicy::Accessory); + } + log::debug!( + "[aiui] setup window close → hidden + demoted to Accessory (host stays alive; I2)" + ); } }) .build(tauri::generate_context!()) .expect("error building tauri application") .run(|app, event| { - // ExitRequested handler (v0.4.43 introduced cleanup; v0.4.44 - // adds the veto for the "headless mode" case). Tauri fires - // ExitRequested on Cmd-Q, on ⌘W of the last visible + // ExitRequested gate — single exit authority (Invariant I1). Tauri + // fires ExitRequested on ⌘Q, on ⌘W / close of the last visible // window, on OS shutdown, and on `.restart()`. The - // last-window-close case is the dangerous one: as soon as - // the agent's dialog window closes after a submit, Tauri - // wants to terminate the process — but that's wrong while - // the GUI is meant to live headless serving the lifetime - // socket. v0.4.42 lost the GUI ~18 ms after every Dialog - // submit through this path (trace 2026-05-26 17:00:28.181 - // → 17:00:28.199); the dialog-window-close branch of - // on_window_event already returned without exit, but - // Tauri's default ExitRequested handler ran *after* it - // and killed the process anyway. + // last-window-close case is the dangerous one: as soon as the + // agent's dialog window closes after a submit, Tauri wants to + // terminate the process — but the host is meant to live headless + // serving the lifetime socket + HTTP. v0.4.42 lost the GUI ~18 ms + // after every dialog submit through exactly this path (trace + // 2026-05-26 17:00:28.181 → 17:00:28.199). // - // Resolution rule: - // • Anyone still depending on us — an attached - // mcp-stdio child or a pending dialog — ⇒ veto the - // exit via `api.prevent_exit()`. The lifetime-grace - // timer (60 s after the last child detaches) remains - // the *only* legitimate "everyone's gone, really - // exit" signal in normal operation. - // • Nobody attached and no pending dialog ⇒ honour the - // exit, but run pre_exit_cleanup first so any ssh-NTR - // tunnel children get SIGTERM instead of becoming - // launchd orphans. - if let tauri::RunEvent::ExitRequested { api, code, .. } = &event { - let attached = app - .try_state::>() - .map(|s| s.child_count()) - .unwrap_or(0); - let pending_dialogs = app - .try_state::>() - .map(|s| s.stats().orphan_count) - .unwrap_or(0); - - if code.is_none() && (attached > 0 || pending_dialogs > 0) { - // Tauri-initiated quit (no explicit exit code). - // Someone still needs us — keep the process alive. + // The decision no longer reads child count or window visibility — + // both were proxies that the 0.4.43–0.4.45 patches kept getting + // wrong. It is `host_should_exit(explicit, cd_running)`: honour the + // exit only when an uninstall/update latched `ExitAuthority`, or + // the Wirt (Claude Desktop) is already gone; otherwise default-deny + // via `api.prevent_exit()`. The watcher owns the CD-gone exit in + // normal operation; this gate is the backstop for every other + // Tauri-initiated termination. + if let tauri::RunEvent::ExitRequested { api, .. } = &event { + // Default-deny (Invariant I1). The only legitimate planned + // exits are: (b) uninstall / (c) update-restart — both latch + // `ExitAuthority` before asking Tauri to terminate — or (a) the + // Wirt (Claude Desktop) is already gone. Every other + // Tauri-initiated exit (last-window-close, ⌘Q, OS quit-all) is + // vetoed. This is what stops the headless host dying ~18 ms + // after a dialog submit (v0.4.42) and on overnight churn + // (v0.4.45): the child count and window visibility no longer + // enter the decision at all. + let explicit = app + .try_state::>() + .map(|a| a.is_authorized()) + .unwrap_or(false); + let cd_running = setup::is_claude_desktop_running(); + if !lifetime::host_should_exit(explicit, cd_running) { logging::trace(&format!( - "[aiui] veto tauri-exit-requested: attached_children={attached}, \ - pending_dialogs={pending_dialogs}" + "[aiui] veto ExitRequested (default-deny): explicit={explicit}, \ + claude_desktop_running={cd_running}" )); api.prevent_exit(); return; @@ -1701,11 +1708,12 @@ pub fn run() { .try_state::>() .map(|cfg| cfg.http_port) .unwrap_or(7777); - let reason = if code.is_some() { - "tauri-exit-requested-explicit" + let reason = if explicit { + "exit-authorized-uninstall-or-update" } else { - "tauri-exit-requested-no-attached" + "exit-claude-desktop-gone" }; + logging::trace(&format!("[aiui] honouring ExitRequested: {reason}")); housekeeping::pre_exit_cleanup(port, reason); } diff --git a/companion/src-tauri/src/lifetime.rs b/companion/src-tauri/src/lifetime.rs index 262c68d..3338874 100644 --- a/companion/src-tauri/src/lifetime.rs +++ b/companion/src-tauri/src/lifetime.rs @@ -4,10 +4,21 @@ //! a Windows named pipe on Windows — and each `aiui --mcp-stdio` child //! connects on startup and holds the stream open. When the child exits //! (Claude Desktop closes it), the OS tears down the stream and the GUI -//! observes an EOF. Once the last client disconnects the GUI starts a 60s -//! grace timer and exits if nobody re-connects. +//! observes an EOF. //! -//! Event-driven, no polling. +//! Lifetime invariant (I1, stabilization-plan): the child counter does **not** +//! decide the host's lifetime. The last-child-disconnect edge is only a +//! *trigger* — it prompts the one question that actually decides whether we may +//! exit: is our host, Claude Desktop, still alive? While Claude Desktop runs we +//! stay, regardless of child count (a dropped/re-spawned MCP server, Cowork +//! churn). Only when Claude Desktop is gone does a short grace then exit follow +//! (the host follows the Wirt). The 60 s child-count grace that used to gate +//! exit was itself the root-cause bug — it killed the host during ordinary +//! churn while Claude Desktop was very much alive. +//! +//! Event-driven, no continuous polling: the only liveness probe is a single +//! `is_claude_desktop_running()` call per disconnect edge (plus one re-check +//! after the short grace). //! //! Cross-platform note: the public surface (`socket_path`, `gui_serve`, //! `mcp_attach`, `LifetimeStats`) is identical on both OSes. The only @@ -29,7 +40,87 @@ use tokio::net::windows::named_pipe::{ }; use tokio::sync::Notify; -pub const SHUTDOWN_GRACE_SECS: u64 = 60; +/// Short grace after the last MCP-stdio child disconnects *and* Claude Desktop +/// is no longer detected, before the host exits. It absorbs two transients: +/// (a) a Claude Desktop quit→relaunch (its own update / a user restart), and +/// (b) the brief teardown window where Claude Desktop has already closed its +/// children's stdin (firing our edge) but its process is still terminating, so +/// `pgrep` could momentarily either way. We re-check Claude-Desktop liveness at +/// expiry and only exit if it is *still* gone. ≤5 s per the spec; nothing polls +/// in a loop. +pub const SHUTDOWN_GRACE_SECS: u64 = 5; + +/// The single exit authority (Invariant I1). A host *planned* exit is legitimate +/// in exactly three cases: (b) aiui is uninstalled or (c) restarting into an +/// update — both signalled explicitly via [`ExitAuthority`] by `quit_app` / +/// the updater — or (a) Claude Desktop, the host process aiui lives with, has +/// terminated. Every other process exit is a crash, never a clean shutdown. +/// +/// Pure so it can be unit-tested without a live Claude Desktop or a running +/// Tauri app: callers pass the two facts in. The impure shell reads them from +/// [`ExitAuthority`] state and `setup::is_claude_desktop_running()`. +pub fn host_should_exit(explicit_uninstall_or_update: bool, claude_desktop_running: bool) -> bool { + explicit_uninstall_or_update || !claude_desktop_running +} + +/// What the post-grace re-check decides. The child counter participates only as +/// "did a child come back" — it never independently authorizes an exit; that is +/// solely `!claude_desktop_running` (Invariant I1). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum GraceOutcome { + /// Claude Desktop is alive again, or a child re-attached — stay headless. + Stay, + /// Claude Desktop is still gone and no child returned — the host follows + /// the Wirt and exits. + Exit, +} + +/// Decision made when the short grace expires. `child_returned` is true if any +/// MCP-stdio child re-attached during the grace; `claude_desktop_running` is a +/// fresh liveness probe. Exit only when the Wirt is gone *and* nothing came +/// back — Claude-Desktop liveness always wins (I1). +pub fn grace_outcome(child_returned: bool, claude_desktop_running: bool) -> GraceOutcome { + if claude_desktop_running || child_returned { + GraceOutcome::Stay + } else { + GraceOutcome::Exit + } +} + +/// Explicit exit authority for the two non-Wirt-death cases (uninstall, update +/// restart). A plain latch: set once by `quit_app` / the updater right before +/// they ask Tauri to terminate, read by the `ExitRequested` default-deny gate +/// so those — and only those — Tauri-initiated exits are honoured. Everything +/// else Tauri tries (last-window-close, ⌘Q, OS quit-all) is vetoed while Claude +/// Desktop is alive. +pub struct ExitAuthority { + authorized: std::sync::atomic::AtomicBool, +} + +impl ExitAuthority { + pub fn new() -> Self { + Self { + authorized: std::sync::atomic::AtomicBool::new(false), + } + } + + /// Latch the authority on. Irreversible by design — once we have decided to + /// uninstall or restart into an update there is no "un-deciding" before the + /// process is gone. + pub fn authorize(&self) { + self.authorized.store(true, Ordering::SeqCst); + } + + pub fn is_authorized(&self) -> bool { + self.authorized.load(Ordering::SeqCst) + } +} + +impl Default for ExitAuthority { + fn default() -> Self { + Self::new() + } +} /// Returns the per-OS handle the GUI listens on and MCP-stdio children /// connect to. @@ -74,10 +165,11 @@ impl LifetimeStats { } } -/// GUI-side: bind the channel, accept connections, and self-terminate after a -/// grace period once all clients are gone. Increments/decrements the shared -/// `conns` counter on every connect/disconnect so `/health` can report the -/// live child count without polling. +/// GUI-side: bind the channel and accept connections. Increments/decrements the +/// shared `conns` counter on every connect/disconnect so `/health` can report +/// the live child count without polling. When the last child leaves it pokes +/// the shutdown watcher, which exits *only* if the Wirt (Claude Desktop) is also +/// gone — the counter never terminates the process on its own (Invariant I1). /// /// Multi-instance hardening (since 0.4.33): if the channel already /// answers a connection, another aiui-app is alive and we are the @@ -276,10 +368,14 @@ async fn gui_serve_windows(sock: PathBuf, app: AppHandle, conns: Arc, app: AppHandle, http_port: u16) -> Arc { let wake = Arc::new(Notify::new()); let conns_w = conns.clone(); @@ -287,27 +383,52 @@ fn make_shutdown_watcher(conns: Arc, app: AppHandle, http_port: u16 tokio::spawn(async move { loop { wake_w.notified().await; + // Edge: the last MCP-stdio child just disconnected. The counter is + // only a trigger (I1) — it does not authorize an exit. If a child + // re-attached already, there is nothing to decide. if conns_w.load(Ordering::SeqCst) > 0 { continue; } + // Short grace, then let Claude-Desktop liveness — never the child + // count — make the call. The grace absorbs (a) a Claude Desktop + // quit→relaunch (its own update, a user restart) and (b) the + // teardown window where Claude Desktop has already closed our + // child's stdin (firing this very edge) but its process is still + // terminating, so a probe *now* could read either way. + // + // We arm the grace even when Claude Desktop currently looks alive: + // during ordinary churn it simply expires into `Stay`, and arming + // unconditionally is exactly what closes the teardown race that a + // "skip the grace if CD looks alive" shortcut would leave open — + // otherwise a probe catching CD mid-quit as "alive" would `Stay` + // with no further edge ever firing, stranding the host alive after + // its Wirt is gone (a Step-2 regression). The cost is a single 5 s + // timer + one `pgrep` per disconnect edge — no continuous poll. trace(&format!( - "lifetime: no clients, grace timer {SHUTDOWN_GRACE_SECS}s" + "lifetime: last child gone — grace {SHUTDOWN_GRACE_SECS}s then re-check Claude Desktop liveness" )); - tokio::select! { - _ = tokio::time::sleep(Duration::from_secs(SHUTDOWN_GRACE_SECS)) => { - if conns_w.load(Ordering::SeqCst) == 0 { - trace("lifetime: grace expired, exiting"); - // Hard exit bypassing Tauri's ExitRequested dance — - // Cmd-Q and window-close are deliberately blocked - // there, so the only legitimate shutdown path is - // this one. - crate::housekeeping::pre_exit_cleanup(http_port, "grace-expired"); - let _ = app; - std::process::exit(0); - } + tokio::time::sleep(Duration::from_secs(SHUTDOWN_GRACE_SECS)).await; + let child_returned = conns_w.load(Ordering::SeqCst) > 0; + let cd_running = crate::setup::is_claude_desktop_running(); + match grace_outcome(child_returned, cd_running) { + GraceOutcome::Stay => { + trace(&format!( + "lifetime: staying after grace \ + (claude_desktop_running={cd_running}, child_returned={child_returned})" + )); } - _ = wake_w.notified() => { - trace("lifetime: new client within grace, staying"); + GraceOutcome::Exit => { + trace( + "lifetime: Claude Desktop gone after grace and no child returned — \ + host follows Wirt, exiting", + ); + // Hard exit: this is exit case (a), the watcher's own + // authority. It bypasses Tauri's ExitRequested gate (which + // default-denies) because the gate has no way to know the + // watcher already established `!is_claude_desktop_running()`. + crate::housekeeping::pre_exit_cleanup(http_port, "claude-desktop-gone"); + let _ = app; + std::process::exit(0); } } } @@ -476,4 +597,63 @@ mod tests { // pure. Real behavior is exercised in integration tests. let _ = is_interactive_session(); } + + // --- Step-1 verification mini-harness (stabilization-plan §Step 2) --- + // + // The decision core is pulled out as pure functions so the two invariants + // can be asserted without a live Claude Desktop or a running Tauri app: + // * the host survives a child flap as long as Claude Desktop runs, and + // * the host exits once Claude Desktop quits. + // These mirror exactly the (child_returned, claude_desktop_running) facts + // the watcher reads at grace expiry and the (explicit, cd_running) facts + // the ExitRequested gate reads. + + #[test] + fn host_stays_while_claude_desktop_runs() { + // I1: with Claude Desktop alive and no explicit uninstall/update, the + // host may never plan an exit — whatever the child count did. + assert!(!host_should_exit(false, true)); + } + + #[test] + fn host_exits_when_claude_desktop_quits() { + // Case (a): Wirt gone, no explicit signal → exit authorized. + assert!(host_should_exit(false, false)); + } + + #[test] + fn host_exits_on_explicit_uninstall_or_update_even_if_cd_alive() { + // Cases (b)/(c): uninstall / update-restart authorize exit regardless + // of Claude-Desktop liveness. + assert!(host_should_exit(true, true)); + assert!(host_should_exit(true, false)); + } + + #[test] + fn child_flap_with_claude_desktop_alive_stays() { + // The pivotal regression case: the last child disconnected (Cowork + // churn / MCP re-spawn) but Claude Desktop is alive — STAY. This is the + // exact scenario the old 60 s child-count grace got wrong by exiting. + assert_eq!(grace_outcome(false, true), GraceOutcome::Stay); + // A child re-attaching during the grace also keeps us up, trivially. + assert_eq!(grace_outcome(true, true), GraceOutcome::Stay); + assert_eq!(grace_outcome(true, false), GraceOutcome::Stay); + } + + #[test] + fn claude_desktop_quit_with_no_child_exits() { + // Wirt gone after the grace and nothing came back → host follows Wirt. + assert_eq!(grace_outcome(false, false), GraceOutcome::Exit); + } + + #[test] + fn exit_authority_latches() { + let auth = ExitAuthority::new(); + assert!(!auth.is_authorized()); + auth.authorize(); + assert!(auth.is_authorized()); + // Idempotent — staying latched is the contract. + auth.authorize(); + assert!(auth.is_authorized()); + } } diff --git a/companion/src-tauri/tauri.conf.json b/companion/src-tauri/tauri.conf.json index 4df09da..0aef70a 100644 --- a/companion/src-tauri/tauri.conf.json +++ b/companion/src-tauri/tauri.conf.json @@ -1,7 +1,7 @@ { "$schema": "../node_modules/@tauri-apps/cli/config.schema.json", "productName": "aiui", - "version": "0.4.46", + "version": "0.5.0", "identifier": "de.byte5.aiui", "build": { "frontendDist": "../dist", diff --git a/companion/src/lib/updater.ts b/companion/src/lib/updater.ts index ccac9ac..61ea73e 100644 --- a/companion/src/lib/updater.ts +++ b/companion/src/lib/updater.ts @@ -93,5 +93,10 @@ export async function checkForUpdates(opts: { silent?: boolean } = {}): Promise< } catch (e) { console.debug(`[aiui] clear_pending_update failed (continuing): ${e}`); } + // Invariant I1: the host's ExitRequested gate default-denies every + // Tauri-initiated exit. `relaunch()` fires ExitRequested, so we must latch + // the exit authority first (case (c), update-restart) or the relaunch would + // be vetoed and the freshly-installed update would never take effect. + await invoke("authorize_exit_for_update"); await relaunch(); } diff --git a/docs/architecture/stabilization-plan.md b/docs/architecture/stabilization-plan.md new file mode 100644 index 0000000..351c201 --- /dev/null +++ b/docs/architecture/stabilization-plan.md @@ -0,0 +1,248 @@ +# aiui Stabilization Plan (locked spec — guards against drift) + +Status: Step 1 implemented (Refs #137); Steps 2–4 awaiting sign-off. +See "Step 1 — implementation record" at the end of this document. +Origin: root-cause analysis 2026-05-29 (Opus 4.8 code analysis + independent +Codex diagnosis, convergent; external validation of the three pivotal facts). + +The instability is not a bug-swarm but **one architectural fault with many +symptoms**: aiui fuses three components with different lifecycles into one +binary whose lifetime hangs on a fragile proxy (count of mcp-stdio children +attached to a socket + 60 s grace) instead of on the real signal (is the host +Claude Desktop alive). The remote path additionally lacks the resurrection and +cold-start retry the local path has. + +This plan is measured against the invariants below. Every change either +establishes an invariant or is out of scope. + +## Invariants (the contract) + +- **I1 — Host planned-exit ONLY in three cases:** (a) Claude Desktop terminates, + (b) aiui is uninstalled, (c) update-restart. *Every other process exit is a + crash* — logged as such, never traced as a clean shutdown. +- **I2 — Window close ≠ process exit.** Red X / ⌘W / "Beenden" on any window = + hide window (+ demote to Accessory if no dialog remains). Never `app.exit`. +- **I3 — Tunnel owner = the Mac (SSH client), always.** Structurally forced: + Mac→remote reachability is the precondition of the whole setup; remote→Mac is + never guaranteed. The remote bridge is a passive forwarder; only the Mac can + establish/repair the tunnel. +- **I4 — Remote bridge planned-exit ONLY:** stdin-EOF (its Claude Code session + ended) or deregistration. *Never* killed from the Mac to force a version. +- **I5 — Nothing kills a process / tunnel / bridge that may hold an in-flight + request.** Graceful drain before any teardown. +- **I6 — Local and remote bridges have identical resilience semantics** + (cold-start poll, async-render polling, progress notifications, error + classification, timeouts). +- **I7 — Every render resolves to exactly one terminal outcome** (kept from + current behaviour). +- **I8 — Multi-window:** N concurrent dialogs allowed; **each window carries a + human-legible session identifier** so the user can tell which session a dialog + belongs to. + +## Step 1 — Host lifetime invariant (decided; highest leverage, lowest risk) + +Files: `lifetime.rs`, `lib.rs`. + +Single exit authority. Introduce one predicate consulted by every +exit-candidate path: + + fn host_should_exit(reason) -> bool + = explicit_uninstall_or_update // cases (b)/(c): set by quit_app / updater + || !setup::is_claude_desktop_running() // case (a): the real Wirt signal + +- **`lifetime::make_shutdown_watcher` (grace-expired):** keep the *edge* (last + child disconnected) but, on that edge, consult `is_claude_desktop_running()` + (already exists, `pgrep -f /Applications/Claude.app/`). CD alive → **do not + arm grace, do not exit** (CD merely dropped/restarted the aiui MCP server, or + Cowork churn). CD gone → short grace (≤5 s) then exit. This reuses an existing + edge + an existing helper → **no objc bridge, no continuous poll tick.** + - When CD quits it closes its children's stdin → EOF → they exit → disconnect + → the edge fires → host exits. The edge therefore *does* fire on the only + legitimate "Wirt endet" event. + - Optional enhancement (later): an `NSWorkspaceDidTerminateApplication` + observer (objc2) for instant detection. Not required for correctness. +- **Setup-window `CloseRequested` (lib.rs `on_window_event`):** `api.prevent_close()` + + `window.hide()` + demote to Accessory. Remove the + `setup-close-no-children → app.exit(0)` path entirely. +- **`RunEvent::ExitRequested` (lib.rs):** **default-deny** — `api.prevent_exit()` + for *every* Tauri-initiated exit. Only the explicit paths (quit_app / updater + restart) and the watcher's CD-gone exit may terminate. Removes the + veto-by-child-count logic. +- **`LifetimeStats` child counter:** demoted to telemetry (`/health`) + the + start-trigger only (`mcp_attach` first attach → `open --auto`). It **never** + gates exit again. + +No-regression: the grace-exit/idle-exit existed to reap stale state; that is now +covered by the existing `disk_version_if_stale` self-check + the housekeeping +sweeps, which stay. + +## Step 2 — Remote bridge never killed to force a version (decided) + +Files: `setup.rs`, `lib.rs`, `http.rs`, `python/.../server.py`. + +- Remove `kill_remote_mcp_stdio` / `pkill -f 'aiui-mcp'` from the GUI-startup + remote-pin loop (`lib.rs`) and from `resync_remote` / `add_remote` re-add. + Patch the pin in `~/.claude.json`; it takes effect at the **next natural + spawn**. A live session keeps its version until it ends. +- Keep an outbound kill ONLY for true deregistration (`remove_remote` / + uninstall), and even there prefer config-removal + natural session end over a + broad `pkill`. If a kill is kept, scope it precisely (never blunt `-f + aiui-mcp`, which has cross-session blast radius — the remote twin of the + 0.4.42 Cowork-kill). +- Cooperative version floor (replaces external enforcement): add `wire_version` + to `/version` + `/probe`. The bridge reads it on connect; on a hard + incompatibility it returns a **structured tool error** ("incompatible aiui + versions — restart this Claude Code session"), never gets killed, never + crashes. Tolerate ordinary version skew (the wire contract is versioned and + stable). + +## Step 3 — Async render + bridge parity (decided; closes the ReadError class) + +Files: `http.rs`, `dialog.rs`, `mcp.rs`, `python/.../server.py`. + +- **Async `/render` (RFC point #1, never built):** `POST /render` registers the + dialog and returns `{id, ttl}` immediately. New `GET /render/{id}` is a + bounded long-poll (~25 s) returning `{pending}` or the terminal result. + Removes the multi-minute open HTTP connection that any GUI/tunnel blip turns + into ReadError. +- **Both bridges identical:** POST then loop `GET /render/{id}` until terminal + or the client gives up; emit `notifications/progress` each iteration. +- **Python bridge to full parity (I6):** + - Add the `wait_for_aiui` cold-start poll (`/ping` up to ~30 s) before + posting — mirror the Rust bridge. Replaces the brittle single 3 s preflight. + - Add progress notifications (FastMCP progress API — verify exact call). + - Switch to the async-render polling loop above. + - Classify errors precisely: TCP-refused (tunnel down) vs. connected-but-no- + HTTP (= ReadError: tunnel up, Mac down) vs. 401 (token). Today the remote + `ConnectError` branch is dead and ReadError gets a misleading message. + +## Step 4 — Tunnel mechanism + multi-window (one open decision) + +Files: `tunnel.rs`, `setup.rs`, `lib.rs`, `dialog.rs`, `http.rs`, frontend. + +### Tunnel — DECISION PENDING, settled empirically (not by taste) + +Deciding facts to establish first: +1. Does the SSH connection Claude Desktop opens to the remote carry a + `RemoteForward 7777` from `~/.ssh/config`? (Very likely the reason the code + migrated away from piggyback.) +2. Is concurrent multi-session **on the same remote host** a requirement? + +- **If (1) yes and (2) no → Piggyback** (original design): aiui patches + `RemoteForward` into the host's `~/.ssh/config` block; **delete the dedicated + `ssh -NTR` TunnelManager + orphan-sweeps + shared-forward detection** (big + complexity removal). Inherits the user's working auth (agent-fwd / ProxyJump / + MFA). +- **If (1) no or (2) yes → aiui-dedicated** (clean up current code): keep the + Mac-owned `ssh` but fix lifecycle/ownership; requires a non-interactive + BatchMode key Mac→remote. +- Either way: **exactly one Mac-side owner.** Today's bug is running both in + parallel (one remote port, ExitOnForwardFailure collision → stale port → + ReadError). End-to-end tunnel health = probe `/probe` *through* the tunnel, + not "ssh process alive 2 s". + +### Multi-window (I8) + +- Drop single-occupancy: remove `try_register`'s "reject if any pending → 409". + Allow N concurrent dialogs (registry already supports `DIALOG_HARD_CAP`). +- **One window per render**, window label = dialog id (replaces the single + reused `DIALOG_WINDOW_LABEL`). Teardown keyed by id. +- **Session identifier (I8):** + - Render spec gains a `session` field (string), set by the caller; tool + wrappers (`mcp.rs`, `server.py`) gain a `session` param. Skill + tool + descriptions instruct the agent to pass a short human label (project name + etc.). + - The **remote Python bridge auto-injects its `hostname`** (or the registered + alias) as `session_origin`, so the user always sees which host a dialog came + from even if the agent passes nothing (the Mac cannot distinguish remotes at + `:7777` — all share one port — so origin must come from the caller side). + - Window chrome (title bar / header chip) shows `session` + `session_origin`. + - Fallback when the agent passes nothing: `session_origin` + short id. + +## Cross-cutting — observability + test harness (makes "no regression" real) + +- Promote the ad-hoc trace into an explicit named lifecycle state machine + + event log (states: cold → serving → headless-idle → draining → exit(reason)). +- **Integration test harness for the remote path** — the layer with zero + coverage today (only pure-function unit tests exist), which is *why* this has + been whack-a-mole. Scenarios it must exercise before each release: + tunnel-down, GUI-down-mid-call, update-mid-call, parallel sessions (same + + different remotes), Claude-Desktop-quit, Claude-Desktop-restart. + +## Sequencing & no-regression guardrail + +Order: **1 → 2 → 3 → 4.** Each step is independently shippable and verifiable. +Steps 1–3 deliver the bulk of the stability gain and do **not** depend on the +tunnel decision. + +For every mechanism this plan retires (grace-exit, remote `pkill`, +single-occupancy 409, and the dedicated tunnel if we choose piggyback): name the +original incident it guarded against and show the new model covers it **before** +deleting it. Each retired path was added for a real failure — the replacement +must demonstrably subsume that failure. + +## Step 1 — implementation record (shipped under Refs #137) + +Files touched: `lifetime.rs`, `lib.rs`, `http.rs`, `src/lib/updater.ts`. + +Single exit authority is now real: + +``` +host_should_exit(explicit, cd_running) = explicit || !cd_running +``` + +`explicit` is an `ExitAuthority` latch (an `AtomicBool` in `lifetime.rs`, +`manage`d in `lib.rs`) set only by `quit_app` (uninstall), the HTTP `/update` +restart path (`http.rs`), and the frontend update-restart +(`authorize_exit_for_update`, called from `updater.ts` before `relaunch()`). +`cd_running` is `setup::is_claude_desktop_running()`. The predicate is a pure +function and unit-tested. + +Wiring: + +- **`lifetime::make_shutdown_watcher`** — keeps the last-child-disconnect edge + as a *trigger only*. On the edge it arms a short grace (`SHUTDOWN_GRACE_SECS`, + now 5 s) and then decides via the pure `grace_outcome(child_returned, + cd_running)`: exit only if Claude Desktop is gone *and* no child returned; + otherwise stay. No continuous poll — one `pgrep` per edge. +- **Setup-window `CloseRequested`** (`lib.rs`) — `api.prevent_close()` + + `window.hide()` + Accessory demote. The `setup-close-no-children → + app.exit(0)` path is gone. +- **`RunEvent::ExitRequested`** (`lib.rs`) — default-deny: `api.prevent_exit()` + unless `host_should_exit`. The child-count / pending-dialog veto is gone. +- **`LifetimeStats`** — counter retained for `/health` telemetry and the + `mcp_attach` start-trigger; it no longer reads into any exit decision. + +### Verification mini-harness (Step 2 of the work order) + +Pure decision functions are unit-tested in `lifetime.rs` so both invariants are +asserted without a live Claude Desktop or a running Tauri app — exactly the two +facts the runtime reads: + +- `child_flap_with_claude_desktop_alive_stays` — `grace_outcome(false, true) = + Stay`: the host survives a child flap while CD runs (the case the old 60 s + grace got wrong). +- `claude_desktop_quit_with_no_child_exits` — `grace_outcome(false, false) = + Exit`: the host follows the Wirt on CD-quit. +- `host_*` tests pin the three legitimate exits (uninstall, update, CD-gone). + +### Prove-then-delete (Step 3 of the work order) + +| Retired path | Original incident it guarded | New model that subsumes it | +|---|---|---| +| `grace-expired` (60 s after last child, child-count gated) | reap stale state when "nobody needs aiui" | the only legitimate "nobody needs us" signal is **Wirt gone** → `claude-desktop-gone` exit. Stale-binary / multi-instance reaping stays via `disk_version_if_stale` + housekeeping sweeps (untouched). The 60 s-on-child-count exit *was itself the bug* — it killed the host during Cowork churn / MCP re-spawn while CD was alive. | +| `setup-close-no-children → app.exit(0)` | let the app get out of the way when the user closed Settings and no children were attached (Issue #72 lineage) | I2: window close = hide + Accessory demote. The host stays headless with its Wirt; the Dock icon is dropped so "getting out of the way" no longer needs process death. Truly removing aiui is uninstall (`quit_app`, explicit). | +| `tauri-exit-requested-no-attached` (veto-by-child-count) | 0.4.42 lost the GUI ~18 ms after every dialog submit via Tauri's last-window-close ExitRequested; 0.4.44 added a child-count veto | default-deny `host_should_exit`: every Tauri-initiated exit is vetoed unless explicit uninstall/update or CD gone. The post-submit last-window-close exit is now unconditionally vetoed (CD alive, not explicit) → host survives. Child count no longer participates. | + +### One deliberate deviation from the spec's prose + +The spec text says "CD alive → **do not arm grace**". The implementation arms +the 5 s grace on *every* last-child-disconnect edge and gates the *exit* on a +fresh `is_claude_desktop_running()` probe at expiry. This is functionally +identical for the churn case (grace expires into `Stay`) but closes a race the +literal wording leaves open: if the edge fires while CD is mid-quit and `pgrep` +still matches a terminating helper, a "skip grace if CD looks alive" shortcut +would `Stay` with no further edge to re-trigger — stranding the host alive after +its Wirt is gone, a Step-2 ("host exits on CD-quit") regression. Invariant I1 is +preserved exactly: the exit is gated on `!is_claude_desktop_running()`; CD-alive +never exits. From 7f2497299b9c8459f25b3860c18d7a204997c3b0 Mon Sep 17 00:00:00 2001 From: iret77 <63622643+iret77@users.noreply.github.com> Date: Sat, 30 May 2026 16:02:21 +0200 Subject: [PATCH 02/27] =?UTF-8?q?fix:=20cancellation-safe=20/render=20?= =?UTF-8?q?=E2=80=94=20kill=20409-storm=20+=20stranded=20empty=20window?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acute bug from the 2026-05-30 test: after a dialog the next aiui call gets "409 Conflict — companion busy" repeatedly, and an empty dialog window is left on screen. Root cause is a cancellation-safety hole in the synchronous /render handler, independent of the Step-1 host-lifetime work: /render registers a dialog, surfaces a window, then parks on timeout(DIALOG_TTL=2h, result_rx). The MCP client gives up far sooner — the local Rust bridge's reqwest client times out at 300s — and on any client-side give-up (timeout, ReadError, tunnel blip, slow dialog) Axum drops the handler future. None of the explicit teardown then runs, so the registry entry sits pending for the full 2h TTL (every subsequent /render → 409) and the already-surfaced window strands empty. Fix: a RenderGuard (RAII) armed right after try_register and run on *any* drop, including the future-cancelled path the explicit teardown can't reach. On drop it cancels the registry entry (frees the slot immediately) and destroys the dialog window. Disarmed after the normal terminal teardown so precise behaviour is unchanged; cancel is a no-op once the entry is gone and destroy_dialog_window is idempotent, so an over-fire is harmless. The ui_unreachable 503 path now also gets its surfaced window torn down (it previously left it stranded). This is a targeted robustness fix, NOT the spec's Step 3 async /render (which removes the held connection entirely and properly supports long human fills); noted as such in the plan doc. DIALOG_TTL is deliberately left at 2h — the leak was the missing drop-cleanup, not the TTL (v0.4.41 raised it for slow forms on purpose). Tests: RenderGuard armed-drop frees the slot + delivers a cancelled result; disarmed-drop leaves the terminal path untouched. cargo test 106 passed, clippy -D warnings clean. Refs #137 Co-Authored-By: Claude Opus 4.8 (1M context) --- companion/src-tauri/src/http.rs | 126 ++++++++++++++++++++++++ docs/architecture/stabilization-plan.md | 13 +++ 2 files changed, 139 insertions(+) diff --git a/companion/src-tauri/src/http.rs b/companion/src-tauri/src/http.rs index b85ea9b..362fa26 100644 --- a/companion/src-tauri/src/http.rs +++ b/companion/src-tauri/src/http.rs @@ -517,6 +517,65 @@ fn validate_spec(spec: &serde_json::Value) -> Result<(), (String, String)> { Ok(()) } +/// RAII cleanup for a registered render — closes the cancellation-safety hole +/// behind the 409-storm + stranded-empty-window pair (2026-05-30 report). +/// +/// `/render` registers a dialog, surfaces a window, then parks on +/// `timeout(DIALOG_TTL=2h, result_rx)`. The MCP client gives up far sooner — +/// the local Rust bridge's reqwest client times out at 300 s — and on any +/// client-side give-up (timeout, ReadError, tunnel blip, slow dialog) Axum +/// **drops this handler future**. None of the explicit teardown below then +/// runs, so the registry entry sits pending for the full 2 h TTL — every +/// subsequent `/render` gets a 409 — and the already-surfaced window is left +/// stranded empty. +/// +/// This guard is armed right after `try_register` and runs on *any* drop, +/// including the future-cancelled case the explicit paths can't reach: it +/// cancels the registry entry (freeing the slot immediately) and destroys the +/// dialog window. It is disarmed once the handler completes its own terminal +/// teardown, so the normal paths keep their precise behaviour and we don't +/// double-hop the main thread. `dialog.cancel` is a no-op once the entry is +/// gone and `destroy_dialog_window` is idempotent, so an over-fire is harmless. +/// +/// Note: this is a targeted robustness fix, not the spec's Step 3 (async +/// `/render`), which removes the multi-minute held connection entirely. It +/// makes the *current* synchronous handler cancellation-safe in the meantime. +struct RenderGuard { + id: String, + dialog: Arc, + /// `None` only in unit tests, where no Tauri app exists to host a window. + app: Option, + armed: bool, +} + +impl RenderGuard { + fn disarm(&mut self) { + self.armed = false; + } +} + +impl Drop for RenderGuard { + fn drop(&mut self) { + if !self.armed { + return; + } + trace(&format!( + "render: handler future dropped before terminal teardown — \ + cleaning up id={} (cancel registry entry + destroy window)", + self.id + )); + // Free the registry slot so the next /render isn't 409'd for 2 h. + self.dialog.cancel(&self.id); + // Tear down the surfaced window so it can't strand empty. + if let Some(app) = &self.app { + let app_for_destroy = app.clone(); + let _ = app.run_on_main_thread(move || { + crate::destroy_dialog_window(&app_for_destroy) + }); + } + } +} + async fn render( State(state): State, headers: HeaderMap, @@ -591,6 +650,15 @@ async fn render( } }; trace(&format!("render: registered id={}", id)); + // Cancellation-safety net: from here until the explicit terminal teardown + // below, a dropped handler future (client give-up) must not leak the + // registry entry or strand the window. See `RenderGuard`. + let mut guard = RenderGuard { + id: id.clone(), + dialog: state.dialog.clone(), + app: Some(state.app.clone()), + armed: true, + }; let dr = DialogRequest { id: id.clone(), spec: req.spec, @@ -800,6 +868,9 @@ async fn render( .app .run_on_main_thread(move || crate::destroy_dialog_window(&app_for_destroy)); } + // Terminal teardown done explicitly above — stand the guard down so it + // doesn't redundantly re-cancel/re-destroy on scope exit. + guard.disarm(); // Lifecycle-driven update check (#42): fire once after every // successful render. Frontend gates with a 30-min cooldown so this is @@ -965,3 +1036,58 @@ mod validate_tests { assert!(validate_spec(&spec).is_err()); } } + +#[cfg(test)] +mod render_guard_tests { + use super::RenderGuard; + use crate::dialog::DialogState; + use std::sync::Arc; + + // Regression: the 409-storm + stranded-empty-window pair (2026-05-30). + // When the /render handler future is dropped (client give-up) the registry + // entry must be freed immediately, not left pending for the 2 h TTL. The + // window-destroy half needs a Tauri app, so these cover the registry half + // (`app: None`) — the half that produces the 409. + + #[test] + fn armed_guard_drop_frees_registry_slot() { + let ds = Arc::new(DialogState::new()); + let (id, result_rx, _ack) = ds.try_register().expect("first register is free"); + assert_eq!(ds.stats().orphan_count, 1); + { + let _guard = RenderGuard { + id: id.clone(), + dialog: ds.clone(), + app: None, + armed: true, + }; + // future "dropped" here + } + // Slot freed → the next render would NOT get a 409. + assert_eq!(ds.stats().orphan_count, 0); + assert!(ds.try_register().is_ok(), "registry is free again after guard cleanup"); + // The awaiter observes a cancelled terminal result, not a hang. + let r = result_rx.blocking_recv().expect("result_tx sent on cancel"); + assert!(r.cancelled); + } + + #[test] + fn disarmed_guard_drop_leaves_terminal_path_untouched() { + // The normal terminal path disarms after its own teardown; the guard + // must then do nothing (no double-cancel, no spurious slot churn). + let ds = Arc::new(DialogState::new()); + let (id, _result_rx, _ack) = ds.try_register().unwrap(); + { + let mut guard = RenderGuard { + id: id.clone(), + dialog: ds.clone(), + app: None, + armed: true, + }; + guard.disarm(); + } + // Entry untouched by the disarmed guard (the real handler's explicit + // `complete`/`cancel` owns removal on the terminal path). + assert_eq!(ds.stats().orphan_count, 1); + } +} diff --git a/docs/architecture/stabilization-plan.md b/docs/architecture/stabilization-plan.md index 351c201..d8f67c1 100644 --- a/docs/architecture/stabilization-plan.md +++ b/docs/architecture/stabilization-plan.md @@ -100,6 +100,19 @@ Files: `setup.rs`, `lib.rs`, `http.rs`, `python/.../server.py`. Files: `http.rs`, `dialog.rs`, `mcp.rs`, `python/.../server.py`. +> **Interim fix already landed (2026-05-30, with the Step-1 PR #137):** the +> acute 409-storm + stranded-empty-window pair was a *cancellation-safety* hole, +> not the full async gap. The synchronous `/render` handler parks on +> `timeout(DIALOG_TTL=2h, result_rx)` while the MCP client gives up far sooner +> (the local Rust bridge times out at 300 s). On client give-up Axum drops the +> handler future, and none of the explicit teardown ran → the registry entry +> leaked for 2 h (409 for every later render) and the surfaced window stranded +> empty. A `RenderGuard` (RAII) now cancels the entry + destroys the window on +> *any* drop, including the future-cancelled case. This makes the current +> handler cancellation-safe; the async-render protocol below still supersedes it +> (it removes the held connection entirely and is what properly supports long +> human fills without the client timing out at all). + - **Async `/render` (RFC point #1, never built):** `POST /render` registers the dialog and returns `{id, ttl}` immediately. New `GET /render/{id}` is a bounded long-poll (~25 s) returning `{pending}` or the terminal result. From 8419489b67be0bbbaf812999af643b6a841ff66f Mon Sep 17 00:00:00 2001 From: iret77 <63622643+iret77@users.noreply.github.com> Date: Sat, 30 May 2026 16:28:48 +0200 Subject: [PATCH 03/27] =?UTF-8?q?fix:=20Step=202=20=E2=80=94=20never=20kil?= =?UTF-8?q?l=20a=20remote=20bridge=20to=20force=20a=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes the version-forcing remote kills and replaces external enforcement with a cooperative wire-version floor (stabilization-plan Step 2). `setup::kill_remote_mcp_stdio` was `ssh … pkill -f 'aiui-mcp'`: a blunt sweep that crashed live remote sessions mid-call (Claude Code does NOT respawn a disconnected MCP) and matched *every* aiui-mcp on the host — the remote twin of the 0.4.42 Cowork-kill, and outright unsafe now that parallel sessions per remote are a requirement. Deleted, along with all three callers: - GUI-startup remote-pin loop (lib.rs) - add_remote re-add sweep - resync_remote (now re-pin only) The version pin in ~/.claude.json takes effect at the next natural spawn; a live session keeps its version until it ends on its own. Deregistration (remove_remote / uninstall_all) already used config-removal + tunnel-stop, no kill — unchanged. Cooperative floor: WIRE_VERSION=1 in http.rs, surfaced on /version + /probe. The Python bridge's _check_wire_compat reads it once per process and raises a structured "restart this session" tool error only on a hard wire mismatch; an absent field is treated as v1 and transient /version read errors are tolerated (ordinary app-version skew never blocks). The native Rust bridge is the same binary as the companion, so it needs no floor check. Tests: cargo test 105 (−1 = deleted kill test), clippy -D warnings clean, python 21 (4 new wire-compat). Refs #137 Co-Authored-By: Claude Opus 4.8 (1M context) --- companion/src-tauri/src/http.rs | 16 ++++ companion/src-tauri/src/lib.rs | 70 +++++++--------- companion/src-tauri/src/setup.rs | 70 ++-------------- docs/architecture/stabilization-plan.md | 14 ++++ python/src/aiui_mcp/server.py | 54 ++++++++++++ python/tests/test_wire_compat.py | 104 ++++++++++++++++++++++++ 6 files changed, 228 insertions(+), 100 deletions(-) create mode 100644 python/tests/test_wire_compat.py diff --git a/companion/src-tauri/src/http.rs b/companion/src-tauri/src/http.rs index 362fa26..f30f978 100644 --- a/companion/src-tauri/src/http.rs +++ b/companion/src-tauri/src/http.rs @@ -111,9 +111,23 @@ struct ChildrenHealth { attached: usize, } +/// Wire-contract version (Step 2, cooperative version floor). Bumped ONLY when +/// the HTTP request/response shapes between the bridges and the companion +/// change incompatibly — independent of the app's release version, which moves +/// on every fix. Both bridges read it from `/version` (and `/probe`) and, on a +/// hard mismatch, return a structured "restart this session" tool error instead +/// of being externally killed. Ordinary app-version skew is tolerated as long +/// as `wire_version` matches. +/// +/// v1: the original `{spec}` → `{id,cancelled,result,reason}` contract. +pub const WIRE_VERSION: u32 = 1; + #[derive(Serialize)] struct VersionResponse { version: String, + /// See [`WIRE_VERSION`]. Surfaced so bridges can enforce a cooperative + /// compatibility floor without anyone killing anyone. + wire_version: u32, build_info: String, binary_path: String, updater_endpoint: String, @@ -222,6 +236,7 @@ async fn probe( Json(serde_json::json!({ "aiui": true, "version": env!("CARGO_PKG_VERSION"), + "wire_version": WIRE_VERSION, "pid": std::process::id(), "build_sha": env!("AIUI_GIT_SHA"), })) @@ -345,6 +360,7 @@ async fn version( } Ok(Json(VersionResponse { version: env!("CARGO_PKG_VERSION").to_string(), + wire_version: WIRE_VERSION, build_info: crate::logging::BUILD_INFO.to_string(), binary_path: crate::setup::app_binary_path(), updater_endpoint: diff --git a/companion/src-tauri/src/lib.rs b/companion/src-tauri/src/lib.rs index f953e03..a74c5bb 100644 --- a/companion/src-tauri/src/lib.rs +++ b/companion/src-tauri/src/lib.rs @@ -716,15 +716,11 @@ async fn add_remote( ); let config_ok = config_step.ok; results.push(config_step); - // Fresh add — there shouldn't be a running child yet, but a - // re-add (Remove + Add the same host) leaves stale ones; sweep - // them so the first tool call respawns clean against the new pin. - if matches!(config_patch, Some(setup::RemoteConfigPatch::Patched)) { - let sweep = setup::kill_remote_mcp_stdio(&host_alias); - if !sweep.ok { - results.push(sweep); - } - } + // Step 2: no version-forcing kill here. A re-add re-pins the version in + // ~/.claude.json; the new pin takes effect at the next natural Claude Code + // spawn. We never `pkill -f aiui-mcp` to force it — that crashed live + // sessions mid-call and hit every other session on the host. + let _ = config_patch; if !(token_ok && config_ok) { // Don't persist the host or start a tunnel for a half-failed @@ -762,39 +758,32 @@ async fn reinstall_skill() -> Result, String> { Ok(results) } -/// On-demand resync trigger for a single registered remote — wraps -/// the same patch-pin + kill-stale-mcp-stdio sequence that runs in -/// the background at every aiui-app startup. Surfaced as a per-remote -/// button in Settings so the user can re-invoke it without restarting -/// aiui (and see the StepResult log inline if a sweep fails). +/// On-demand resync trigger for a single registered remote — re-pins the +/// aiui-mcp version in the remote's `~/.claude.json`. Surfaced as a per-remote +/// button in Settings so the user can re-invoke the pin without restarting +/// aiui (and see the StepResult log inline). /// -/// Why this exists: 0.4.29's auto-resync on GUI-start is silent — if -/// the SSH-side `pkill` fails (remote temporarily unreachable) the -/// stale subprocess keeps running with the previous version. Without -/// a manual trigger, the user would have to close + reopen aiui-app -/// to retry. v0.4.34 adds the on-demand path. +/// Step 2 change: this used to also `pkill -f aiui-mcp` on the host to force +/// the new version onto a running session. That is gone — the kill crashed +/// live sessions mid-call (Claude Code does not respawn a disconnected MCP) +/// and had cross-session blast radius. The re-pin alone is the correct, +/// session-safe action: it takes effect at the next natural spawn while any +/// in-flight session finishes on its current version. If the user genuinely +/// wants a running remote session on the new version *now*, the cooperative +/// path is to end and restart that Claude Code session. #[tauri::command] async fn resync_remote( host_alias: String, ) -> Result, String> { let our_version = env!("CARGO_PKG_VERSION"); - // Re-pin in `~/.claude.json` on the remote (idempotent — if - // already pinned, no rewrite, returns AlreadyCurrent). - let (pin_step, patch) = setup::patch_claude_code_config_remote( + // Re-pin in `~/.claude.json` on the remote (idempotent — if already + // pinned, no rewrite, returns AlreadyCurrent). + let (pin_step, _patch) = setup::patch_claude_code_config_remote( &host_alias, None, our_version, ); - let mut results = vec![pin_step]; - // Sweep stale aiui-mcp children only when the pin actually - // changed (or unconditionally? — yes, unconditionally on - // user-triggered resync, because the user wouldn't click resync - // unless they suspect drift). On unconditional sweep: kills any - // running aiui-mcp regardless of pin state, which is what the - // user wants from a "force fresh" button. - let _ = patch; // not used here, but kept for tracing - results.push(setup::kill_remote_mcp_stdio(&host_alias)); - Ok(results) + Ok(vec![pin_step]) } #[tauri::command] @@ -1497,13 +1486,16 @@ pub fn run() { None => "unknown", } )); - if matches!(patch, Some(setup::RemoteConfigPatch::Patched)) { - let sweep = setup::kill_remote_mcp_stdio(&host_for_task); - logging::trace(&format!( - "remote-pin: {host_for_task}: sweep {}", - if sweep.ok { "ok" } else { "failed" } - )); - } + // Step 2 (Invariant: never kill a remote bridge to + // force a version): we used to `pkill -f aiui-mcp` + // here whenever the pin changed. That blunt sweep + // crashed live remote sessions mid-call (Claude Code + // does NOT respawn a disconnected MCP) and had + // cross-session blast radius — the remote twin of + // the 0.4.42 Cowork-kill. The pin alone is enough: + // it takes effect at the next *natural* spawn (next + // Claude Code session), while any live session keeps + // its current version until it ends on its own. } else { logging::trace(&format!( "remote-pin: {host_for_task} sync failed: {} ({})", diff --git a/companion/src-tauri/src/setup.rs b/companion/src-tauri/src/setup.rs index 70b3b0d..68dda0e 100644 --- a/companion/src-tauri/src/setup.rs +++ b/companion/src-tauri/src/setup.rs @@ -214,16 +214,6 @@ mod host_alias_tests { #[test] fn rejects_newline() { assert!(!is_valid_host_alias("a\nb")); } - // kill_remote_mcp_stdio is the one ssh site that previously had no - // boundary validator (Issue #52 follow-up). An unsafe alias must be - // refused *before* any ssh spawn — this case returns early without - // touching the network, so it is safe to assert in a unit test. - #[test] - fn kill_remote_mcp_stdio_refuses_option_injection() { - let r = super::kill_remote_mcp_stdio("-oProxyCommand=curl evil|sh"); - assert!(!r.ok); - assert!(r.message.contains("Refusing unsafe host alias")); - } } // Note: an earlier version of aiui patched ~/.ssh/config with a @@ -822,57 +812,15 @@ print("ok:patched") (step, patch) } -/// SIGTERM any `aiui-mcp` child still running on `host_alias` — used -/// after a pin update so Claude Desktop / Claude Code respawns the -/// child against the freshly pinned version on its next tool call. -/// Idempotent: succeeds silently when no matching process is running. -/// -/// `pkill -f` on macOS / Linux exit-codes: 0 = killed at least one, -/// 1 = nothing matched. Both are success from our perspective; only -/// the SSH layer or shell-not-found counts as a real failure. -pub fn kill_remote_mcp_stdio(host_alias: &str) -> StepResult { - // Defense-in-depth: like every other remote helper, refuse an unsafe - // alias before spawning ssh. The `--` below already keeps host_alias - // out of ssh option position, but this was the only ssh call site that - // relied on `--` alone — adding the validator restores the "every - // remote helper validates at its boundary" invariant (Issue #52 - // follow-up). host_alias here comes from remotes.json, so this is - // belt-and-suspenders rather than a live hole. - if !is_valid_host_alias(host_alias) { - return StepResult { - ok: false, - message: format!("Refusing unsafe host alias '{host_alias}'"), - details: None, - }; - } - let out = no_window( - std::process::Command::new("ssh").args([ - "-o", - "BatchMode=yes", - "--", - host_alias, - "pkill -f 'aiui-mcp' 2>/dev/null; true", - ]), - ) - .output(); - match out { - Err(e) => StepResult { - ok: false, - message: format!("ssh {host_alias} konnte nicht gestartet werden"), - details: Some(e.to_string()), - }, - Ok(o) if !o.status.success() => StepResult { - ok: false, - message: format!("Stale-mcp-stdio-Sweep auf {host_alias} fehlgeschlagen"), - details: Some(String::from_utf8_lossy(&o.stderr).to_string()), - }, - Ok(_) => StepResult { - ok: true, - message: format!("Stale aiui-mcp children on {host_alias} swept"), - details: None, - }, - } -} +// Step 2 removed `kill_remote_mcp_stdio` (an `ssh … pkill -f 'aiui-mcp'`). +// It existed to force a freshly-pinned version onto a running remote session, +// but `pkill -f` crashed live sessions mid-call (Claude Code does not respawn +// a disconnected MCP) and matched *every* aiui-mcp on the host — the remote +// twin of the 0.4.42 Cowork-kill, and outright unsafe once parallel sessions +// per remote are a requirement. The version pin in `~/.claude.json` now takes +// effect at the next natural spawn; live sessions finish on their current +// version. Deregistration (`remove_remote` / `uninstall_all`) relies on +// config-removal + natural session end, not a broad kill. /// Result of a successful reachability probe — carries the absolute /// uvx path discovered on the remote so subsequent setup steps can diff --git a/docs/architecture/stabilization-plan.md b/docs/architecture/stabilization-plan.md index d8f67c1..921a763 100644 --- a/docs/architecture/stabilization-plan.md +++ b/docs/architecture/stabilization-plan.md @@ -96,6 +96,20 @@ Files: `setup.rs`, `lib.rs`, `http.rs`, `python/.../server.py`. crashes. Tolerate ordinary version skew (the wire contract is versioned and stable). +> **Implemented (2026-05-30, PR #137).** `kill_remote_mcp_stdio` +> (`ssh … pkill -f 'aiui-mcp'`) and all three of its callers — the GUI-startup +> remote-pin loop, `add_remote` re-add, and `resync_remote` — are deleted. The +> pin in `~/.claude.json` now takes effect at the next natural spawn; a live +> session keeps its version until it ends. `resync_remote` is re-pin-only. +> Deregistration (`remove_remote` / `uninstall_all`) was already +> config-removal + tunnel-stop, no kill — left as is. Cooperative floor: +> `WIRE_VERSION = 1` in `http.rs`, surfaced on `/version` + `/probe`; the +> Python bridge's `_check_wire_compat` reads it once per process and raises a +> structured "restart this session" error only on a hard wire mismatch (absent +> field → treated as v1, transient read errors tolerated). Tests: Rust 105 +> green, Python 21 green (4 new wire-compat). The Rust bridge (`mcp.rs`) is the +> same binary as the companion, so it needs no floor check. + ## Step 3 — Async render + bridge parity (decided; closes the ReadError class) Files: `http.rs`, `dialog.rs`, `mcp.rs`, `python/.../server.py`. diff --git a/python/src/aiui_mcp/server.py b/python/src/aiui_mcp/server.py index da93fae..adfef8f 100644 --- a/python/src/aiui_mcp/server.py +++ b/python/src/aiui_mcp/server.py @@ -80,6 +80,16 @@ def _default_token_path() -> str: TIMEOUT_S = float(os.environ.get("AIUI_TIMEOUT_S", "120")) HEALTH_TIMEOUT_S = float(os.environ.get("AIUI_HEALTH_TIMEOUT_S", "3")) +# Cooperative version floor (Step 2). The wire contract between this bridge and +# the Mac companion is versioned independently of either side's release version. +# This bridge speaks wire v1; if the companion reports a *different* +# wire_version we surface a structured "restart this session" tool error rather +# than letting the Mac kill us to force a version. Ordinary app-version skew is +# tolerated — only an incompatible wire_version is fatal. Checked once per +# process (memoised in `_wire_checked`). +EXPECTED_WIRE_VERSION = 1 +_wire_checked = False + _INSTRUCTIONS = """\ aiui is connected — you can render native dialogs on the user's Mac \ instead of asking via chat. Default behaviour for this session: @@ -195,6 +205,50 @@ async def _preflight() -> None: f"aiui companion /health returned {r.status_code}: {r.text[:200]}" ) + # Cooperative version floor (Step 2): once per process, confirm the + # companion speaks a compatible wire version. Reuses this client. + await _check_wire_compat(client) + + +async def _check_wire_compat(client: httpx.AsyncClient) -> None: + """One-time wire-compatibility check against the companion's `/version`. + + Raises a structured ``RuntimeError`` (surfaced to the agent as a tool error) + on a hard wire-version mismatch, telling the user to restart this Claude + Code session so it respawns ``aiui-mcp`` at a matching version — the + cooperative replacement for the Mac externally killing this bridge. + + Tolerant by design: a companion too old to report ``wire_version`` (field + absent → treated as v1), or any transient error reading ``/version``, does + NOT block — we only hard-fail on an explicit, incompatible ``wire_version``. + Memoised via the module-level ``_wire_checked`` so it costs one extra GET + per process, on the first render only. + """ + global _wire_checked + if _wire_checked: + return + try: + r = await client.get( + f"{ENDPOINT}/version", + headers={"Authorization": f"Bearer {_token()}"}, + ) + r.raise_for_status() + remote_wire = int(r.json().get("wire_version", EXPECTED_WIRE_VERSION)) + except Exception as e: # noqa: BLE001 — tolerate skew; never block on a read error + log.debug("wire-compat check skipped (could not read /version): %s", _explain_exc(e)) + _wire_checked = True + return + if remote_wire != EXPECTED_WIRE_VERSION: + # Do NOT memoise a failure — leave it un-set so a subsequent call + # (e.g. after the user restarts the companion) re-checks cleanly. + raise RuntimeError( + f"incompatible aiui versions — this bridge (aiui-mcp {VERSION}) speaks wire " + f"v{EXPECTED_WIRE_VERSION}, but the companion on your Mac speaks wire " + f"v{remote_wire}. Restart this Claude Code session so it respawns aiui-mcp at " + f"a matching version (or update the side that is behind)." + ) + _wire_checked = True + _SRC_KEYS = {"src", "thumbnail"} _MAX_IMAGE_BYTES = 10 * 1024 * 1024 # 10 MB — mirrors the Rust resolver diff --git a/python/tests/test_wire_compat.py b/python/tests/test_wire_compat.py new file mode 100644 index 0000000..68af19f --- /dev/null +++ b/python/tests/test_wire_compat.py @@ -0,0 +1,104 @@ +"""Tests for the cooperative version floor (Step 2). + +The Mac companion no longer kills this bridge to force a version. Instead both +sides carry a `wire_version`; on a hard mismatch the bridge surfaces a +structured "restart this session" tool error and otherwise tolerates ordinary +app-version skew. These cover `_check_wire_compat`. +""" +from __future__ import annotations + +import asyncio +from typing import Any + +import httpx +import pytest + +import aiui_mcp.server as server +from aiui_mcp.server import EXPECTED_WIRE_VERSION, _check_wire_compat + + +class _FakeResp: + def __init__(self, payload: dict[str, Any]) -> None: + self._payload = payload + + def raise_for_status(self) -> None: # all fakes are 200 + return None + + def json(self) -> dict[str, Any]: + return self._payload + + +def _setup_token(monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None: + token_file = tmp_path / "token" + token_file.write_text("dummy-token-for-tests") + monkeypatch.setattr(server, "TOKEN_PATH", token_file) + + +def _run_check() -> None: + async def run() -> None: + async with httpx.AsyncClient() as client: + await _check_wire_compat(client) + + asyncio.run(run()) + + +def test_matching_wire_version_passes_and_memoises( + monkeypatch: pytest.MonkeyPatch, tmp_path: Any +) -> None: + _setup_token(monkeypatch, tmp_path) + monkeypatch.setattr(server, "_wire_checked", False) + + async def fake_get(self: Any, url: str, **kwargs: Any) -> Any: + return _FakeResp({"wire_version": EXPECTED_WIRE_VERSION}) + + monkeypatch.setattr(httpx.AsyncClient, "get", fake_get) + _run_check() # must not raise + assert server._wire_checked is True + + +def test_mismatched_wire_version_raises_structured_error_and_does_not_memoise( + monkeypatch: pytest.MonkeyPatch, tmp_path: Any +) -> None: + _setup_token(monkeypatch, tmp_path) + monkeypatch.setattr(server, "_wire_checked", False) + + async def fake_get(self: Any, url: str, **kwargs: Any) -> Any: + return _FakeResp({"wire_version": EXPECTED_WIRE_VERSION + 998}) + + monkeypatch.setattr(httpx.AsyncClient, "get", fake_get) + with pytest.raises(RuntimeError) as exc_info: + _run_check() + assert "incompatible aiui versions" in str(exc_info.value) + # A mismatch must NOT be memoised — a later restart of the companion should + # be able to clear it without restarting the bridge process. + assert server._wire_checked is False + + +def test_missing_wire_version_field_is_tolerated( + monkeypatch: pytest.MonkeyPatch, tmp_path: Any +) -> None: + """An older companion without the field is treated as wire v1 → compatible.""" + _setup_token(monkeypatch, tmp_path) + monkeypatch.setattr(server, "_wire_checked", False) + + async def fake_get(self: Any, url: str, **kwargs: Any) -> Any: + return _FakeResp({"version": "0.4.46"}) # no wire_version + + monkeypatch.setattr(httpx.AsyncClient, "get", fake_get) + _run_check() # must not raise + assert server._wire_checked is True + + +def test_read_error_is_tolerated_not_fatal( + monkeypatch: pytest.MonkeyPatch, tmp_path: Any +) -> None: + """A transient /version read failure must not block rendering.""" + _setup_token(monkeypatch, tmp_path) + monkeypatch.setattr(server, "_wire_checked", False) + + async def fake_get(self: Any, url: str, **kwargs: Any) -> Any: + raise httpx.ConnectError("companion down") + + monkeypatch.setattr(httpx.AsyncClient, "get", fake_get) + _run_check() # must not raise — tolerate skew/transient + assert server._wire_checked is True From 8b1c61dee38e2cac9b3757f1b60e41b765ce2acf Mon Sep 17 00:00:00 2001 From: iret77 <63622643+iret77@users.noreply.github.com> Date: Sat, 30 May 2026 16:44:39 +0200 Subject: [PATCH 04/27] =?UTF-8?q?feat:=20Step=203=20=E2=80=94=20async=20/r?= =?UTF-8?q?ender=20+=20bridge=20parity=20(additive,=20backward-compatible)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the remote ReadError class without disturbing the proven synchronous path. Async render is opt-in via the `x-aiui-async` header — not a replacement — so old bridges keep working and WIRE_VERSION stays 1. Companion (http.rs): - POST /render + header → registers, surfaces the dialog, hands resolution to a detached task filling an AsyncSlot, returns 202 {id, ttl_secs}. - New GET /render/{id}: poll-loop (200ms ticks, bounded by ASYNC_POLL_WINDOW = 25s) → terminal result (drained once) / {pending:true} / 404. - Without the header the legacy synchronous long-poll runs unchanged. resolve_dialog shares resolution + window teardown across both paths; resolved-but-uncollected slots swept at DIALOG_TTL. Both bridges (mcp.rs, server.py): POST with header, then loop GET until terminal; each GET bounded (40s > server window) so a tunnel/GUI blip costs one poll, never a multi-minute held connection. Both fall back to the sync result if the companion answers 200 not 202 (new bridge ↔ old companion). Python parity (I6): _wait_for_aiui (/ping cold-start poll ~30s, mirrors the Rust bridge), MCP progress notifications per pending poll (FastMCP Context.report_progress, best-effort), the async polling loop, and an explicit httpx.ReadError branch ("tunnel up, Mac not serving") distinct from ConnectError. The Rust bridge already had cold-start + progress. Tests: cargo 106 (+1 slot lifecycle), clippy -D warnings clean, python 26 (+5). Not yet exercised end-to-end against a live remote — integration harness remains the open cross-cutting item. Refs #137 Co-Authored-By: Claude Opus 4.8 (1M context) --- companion/src-tauri/src/http.rs | 315 +++++++++++++++++++----- companion/src-tauri/src/mcp.rs | 75 +++++- docs/architecture/stabilization-plan.md | 27 ++ python/src/aiui_mcp/server.py | 122 ++++++++- python/tests/test_async_render.py | 139 +++++++++++ 5 files changed, 600 insertions(+), 78 deletions(-) create mode 100644 python/tests/test_async_render.py diff --git a/companion/src-tauri/src/http.rs b/companion/src-tauri/src/http.rs index f30f978..fec3253 100644 --- a/companion/src-tauri/src/http.rs +++ b/companion/src-tauri/src/http.rs @@ -3,7 +3,7 @@ use crate::config::AppConfig; use crate::dialog::{DialogRequest, DialogState, DIALOG_TTL}; use crate::lifetime::LifetimeStats; use axum::{ - extract::State, + extract::{Path, State}, http::{HeaderMap, StatusCode}, response::IntoResponse, routing::{get, post}, @@ -40,6 +40,35 @@ const IDLE_RESTART_UPTIME: Duration = Duration::from_secs(24 * 60 * 60); /// Prevents reloading mid-burst when many renders fire close together. const IDLE_RESTART_QUIET: Duration = Duration::from_secs(10 * 60); +/// Header a bridge sets to opt into async `/render` (Step 3). Present → +/// `POST /render` registers + surfaces the dialog, returns `{id, ttl}` +/// immediately (202), and the caller polls `GET /render/{id}`. Absent → the +/// legacy synchronous long-poll (POST holds the connection until the user +/// answers). Backward-compatible: old bridges that don't set it keep working +/// unchanged, so the wire contract stays v1. +const ASYNC_RENDER_HEADER: &str = "x-aiui-async"; + +/// How long a single `GET /render/{id}` long-poll parks before returning +/// `{pending:true}` so the caller can re-poll (and emit a progress +/// notification). Short enough to stay well under any client read timeout, so +/// a tunnel/GUI blip can only ever cost one poll window, never a multi-minute +/// held connection (the remote ReadError class this closes). +const ASYNC_POLL_WINDOW: Duration = Duration::from_secs(25); + +/// Buffered terminal result for an async render, keyed by dialog id. The +/// `POST /render` async branch spawns a task that awaits the user's answer and +/// fills this; `GET /render/{id}` drains it. Decouples the dialog's lifetime +/// from any single HTTP connection. +struct AsyncSlot { + /// `Some` once the dialog reached a terminal outcome; drained by the first + /// successful GET. A `GET /render/{id}` poll-loops (cheap 200 ms ticks, + /// bounded by `ASYNC_POLL_WINDOW`) reading this — no cross-task notifier to + /// reason about, and a missed tick costs at most 200 ms, never correctness. + result: Option, + /// For the opportunistic sweep of resolved-but-never-collected slots. + created_at: Instant, +} + #[derive(Clone)] struct AppState { cfg: Arc, @@ -54,6 +83,9 @@ struct AppState { /// Mutex is fine here — contention is bounded by the rate of /// /render calls. last_render_at: Arc>, + /// Buffered terminal results for async renders (Step 3), keyed by dialog + /// id. Empty in the all-synchronous case. + async_slots: Arc>>, } #[derive(Deserialize)] @@ -159,11 +191,13 @@ pub async fn serve( app, started_at: now, last_render_at: Arc::new(Mutex::new(now)), + async_slots: Arc::new(Mutex::new(std::collections::HashMap::new())), }; let router = Router::new() .route("/health", get(health)) .route("/render", post(render)) + .route("/render/{id}", get(render_poll)) .route("/version", get(version)) .route("/update", post(update)) .route("/ping", get(ping)) @@ -592,6 +626,148 @@ impl Drop for RenderGuard { } } +/// Await a registered dialog's terminal outcome (bounded by `DIALOG_TTL`), +/// then tear its window down. Shared by the synchronous POST path (awaited +/// inline) and the async path (run in a detached task that fills the +/// `AsyncSlot`). Factoring it out keeps the two paths byte-for-byte identical +/// in resolution + teardown semantics. +async fn resolve_dialog( + state: AppState, + id: String, + result_rx: tokio::sync::oneshot::Receiver, +) -> crate::dialog::DialogResult { + trace(&format!("render: awaiting user response id={}", id)); + let result = match tokio::time::timeout(DIALOG_TTL, result_rx).await { + Ok(Ok(r)) => r, + Ok(Err(_)) => crate::dialog::DialogResult { + id: id.clone(), + cancelled: true, + result: serde_json::Value::Null, + reason: Some("channel_dropped".into()), + }, + Err(_) => { + // TTL expired without user response. Cancel the registry entry + // (frees its slot) and produce the same 200-OK cancelled shape a + // user-driven cancel produces — only `reason` differs (#36, the + // v0.4.45 Bug #5 fix: never surface this as a transport error). + trace(&format!("render: TTL expired id={}", id)); + state.dialog.cancel(&id); + crate::dialog::DialogResult { + id: id.clone(), + cancelled: true, + result: serde_json::Value::Null, + reason: Some("ttl_expired".into()), + } + } + }; + trace(&format!( + "render: got response id={} cancelled={}", + result.id, result.cancelled + )); + // Authoritative window teardown (v0.4.46, Bug B): single point that + // guarantees a dialog window never outlives its dialog. Idempotent — + // a no-op on the submit/cancel paths where the window is already gone. + let app_for_destroy = state.app.clone(); + let _ = state + .app + .run_on_main_thread(move || crate::destroy_dialog_window(&app_for_destroy)); + result +} + +/// Drop async-render result slots older than `DIALOG_TTL` — covers the case +/// where a caller posts an async render, the dialog resolves, but the caller +/// never collects the result via GET (process died after POST). Called +/// opportunistically on each new async render; no background reaper. +fn sweep_async_slots(state: &AppState) { + let now = Instant::now(); + state + .async_slots + .lock() + .unwrap() + .retain(|_, s| now.duration_since(s.created_at) <= DIALOG_TTL); +} + +/// Outcome of looking up an async-render slot by id. +enum SlotLook { + /// Resolved — the terminal result (already removed from the map). + Ready(crate::dialog::DialogResult), + /// Registered but not yet resolved. + Pending, + /// No such id — never an async render, or already collected. + Gone, +} + +/// Drain an async-render slot: if resolved, take its result and remove the slot +/// (`Ready`); if still in flight, `Pending`; if absent, `Gone`. Pure over the +/// map so the `/render/{id}` branching is unit-testable without a Tauri app. +fn drain_async_slot( + slots: &mut std::collections::HashMap, + id: &str, +) -> SlotLook { + let taken = match slots.get_mut(id) { + Some(slot) => slot.result.take(), + None => return SlotLook::Gone, + }; + match taken { + Some(result) => { + slots.remove(id); + SlotLook::Ready(result) + } + None => SlotLook::Pending, + } +} + +/// GET `/render/{id}` — bounded long-poll for an async render's result (Step +/// 3). Returns the terminal `{id, cancelled, result, reason}` once available +/// (and drains the slot), `{pending: true}` after one `ASYNC_POLL_WINDOW` so +/// the caller re-polls, or 404 for an unknown id (never an async render, or +/// already collected). The caller loops GET until terminal or it gives up. +async fn render_poll( + State(state): State, + headers: HeaderMap, + Path(id): Path, +) -> impl IntoResponse { + if !auth_ok(&headers, &state.cfg.token) { + return ( + StatusCode::UNAUTHORIZED, + Json(serde_json::json!({"error": "unauthorized"})), + ) + .into_response(); + } + + let deadline = Instant::now() + ASYNC_POLL_WINDOW; + loop { + let look = drain_async_slot(&mut state.async_slots.lock().unwrap(), &id); + match look { + SlotLook::Ready(result) => { + trace(&format!("render_poll: delivered id={}", id)); + return Json(RenderResponse { + id: result.id, + cancelled: result.cancelled, + result: result.result, + reason: result.reason, + }) + .into_response(); + } + SlotLook::Gone => { + trace(&format!("render_poll: unknown id={}", id)); + return ( + StatusCode::NOT_FOUND, + Json(serde_json::json!({"error": "unknown_render_id", "id": id})), + ) + .into_response(); + } + SlotLook::Pending => { + if Instant::now() >= deadline { + return Json(serde_json::json!({"pending": true, "id": id})) + .into_response(); + } + tokio::time::sleep(Duration::from_millis(200)).await; + } + } + } +} + async fn render( State(state): State, headers: HeaderMap, @@ -826,66 +1002,44 @@ async fn render( } } - // ── Normal path ───────────────────────────────────────────────────── - // Wait for the user's submit/cancel — but bounded by `DIALOG_TTL`. A - // dialog that nobody answers eventually returns a structured timeout - // instead of blocking the caller indefinitely (#36). The same TTL is - // used by the registry's opportunistic sweep, so a timed-out entry - // gets cancelled regardless of whether this awaiter or the next - // `register()` call notices first. - trace(&format!("render: awaiting user response id={}", id)); - let result = match tokio::time::timeout(DIALOG_TTL, result_rx).await { - Ok(Ok(r)) => r, - Ok(Err(_)) => crate::dialog::DialogResult { - id: id.clone(), - cancelled: true, - result: serde_json::Value::Null, - reason: Some("channel_dropped".into()), - }, - Err(_) => { - // TTL expired without user response. Cancel the registry - // entry (frees its slot) and fall through to the normal - // 200-OK response below with cancelled:true + reason. - // - // v0.4.45 (Bug #5): previously this returned HTTP 408, which - // mcp.rs's render_dialog treated as a non-success status → - // generic "aiui tool error: render http 408" — a different - // shape than a user-driven cancel (200 {cancelled:true}). - // The agent then saw a transport error instead of a clean - // "user didn't respond" cancellation. Now both the - // user-cancel and the TTL-expiry paths produce the exact - // same tool-result shape; only `reason` differs. - trace(&format!("render: TTL expired id={}", id)); - state.dialog.cancel(&id); - crate::dialog::DialogResult { - id: id.clone(), - cancelled: true, - result: serde_json::Value::Null, - reason: Some("ttl_expired".into()), - } + // ── Async branch (Step 3) ─────────────────────────────────────────── + // If the caller opted in (header `x-aiui-async`), hand the dialog off to a + // detached task and answer immediately with `{id, ttl_secs}` (202). The + // caller polls `GET /render/{id}`. This removes the multi-minute open HTTP + // connection that a tunnel/GUI blip turns into a remote ReadError — + // resolution now lives in a task, not on the wire. + if headers.contains_key(ASYNC_RENDER_HEADER) { + // The detached task owns resolution + window teardown from here. + guard.disarm(); + sweep_async_slots(&state); + { + let mut slots = state.async_slots.lock().unwrap(); + slots.insert( + id.clone(), + AsyncSlot { result: None, created_at: Instant::now() }, + ); } - }; - trace(&format!( - "render: got response id={} cancelled={}", - result.id, result.cancelled - )); - - // Authoritative teardown (v0.4.46, Bug B): the render has reached a - // terminal outcome — user submit/cancel, native X-close, TTL expiry, - // or channel-drop. Destroy the dialog window now, from Rust, on the - // main thread. This is the single point that guarantees a dialog - // window never outlives its dialog: it covers the TTL/channel-drop - // paths the frontend's own close never reaches (the empty-window - // stranding of 2026-05-29), and is a harmless no-op on the - // submit/cancel paths where the window is already gone. - { - let app_for_destroy = state.app.clone(); - let _ = state - .app - .run_on_main_thread(move || crate::destroy_dialog_window(&app_for_destroy)); + let task_state = state.clone(); + let task_id = id.clone(); + tokio::spawn(async move { + let result = resolve_dialog(task_state.clone(), task_id.clone(), result_rx).await; + if let Some(slot) = task_state.async_slots.lock().unwrap().get_mut(&task_id) { + slot.result = Some(result); + } + }); + trace(&format!("render: async accepted id={}", id)); + return ( + StatusCode::ACCEPTED, + Json(serde_json::json!({ "id": id, "ttl_secs": DIALOG_TTL.as_secs() })), + ) + .into_response(); } - // Terminal teardown done explicitly above — stand the guard down so it - // doesn't redundantly re-cancel/re-destroy on scope exit. + + // ── Synchronous path (legacy, backward-compatible) ────────────────── + // No opt-in header → hold the connection until the user answers, exactly + // as before. The guard stays armed across the inline await so a dropped + // connection still cleans up; `resolve_dialog` runs the terminal teardown. + let result = resolve_dialog(state.clone(), id.clone(), result_rx).await; guard.disarm(); // Lifecycle-driven update check (#42): fire once after every @@ -1107,3 +1261,46 @@ mod render_guard_tests { assert_eq!(ds.stats().orphan_count, 1); } } + +#[cfg(test)] +mod async_render_tests { + use super::{drain_async_slot, AsyncSlot, SlotLook}; + use std::collections::HashMap; + use std::time::Instant; + + // Step 3: the GET /render/{id} branching — pending → ready (drained once) + // → gone — without a Tauri app. + #[test] + fn slot_lifecycle_pending_ready_gone() { + let mut slots: HashMap = HashMap::new(); + slots.insert( + "x".into(), + AsyncSlot { result: None, created_at: Instant::now() }, + ); + + // Registered, not resolved → Pending. + assert!(matches!(drain_async_slot(&mut slots, "x"), SlotLook::Pending)); + // Unknown id → Gone. + assert!(matches!(drain_async_slot(&mut slots, "nope"), SlotLook::Gone)); + + // Resolve it. + slots.get_mut("x").unwrap().result = Some(crate::dialog::DialogResult { + id: "x".into(), + cancelled: true, + result: serde_json::Value::Null, + reason: Some("window_closed".into()), + }); + + // First drain delivers the terminal result. + match drain_async_slot(&mut slots, "x") { + SlotLook::Ready(r) => { + assert!(r.cancelled); + assert_eq!(r.reason.as_deref(), Some("window_closed")); + } + _ => panic!("expected Ready"), + } + // Slot was removed → a second drain is Gone (no double-delivery). + assert!(matches!(drain_async_slot(&mut slots, "x"), SlotLook::Gone)); + assert!(slots.is_empty()); + } +} diff --git a/companion/src-tauri/src/mcp.rs b/companion/src-tauri/src/mcp.rs index f7696b3..0d52451 100644 --- a/companion/src-tauri/src/mcp.rs +++ b/companion/src-tauri/src/mcp.rs @@ -655,20 +655,23 @@ async fn render_dialog( let mut spec = spec; crate::imageresolve::resolve_local_paths(&mut spec); let body = json!({ "spec": spec }); - // POST /render is long-poll: the GUI holds the response open - // until the user clicks submit/cancel or the companion-side - // `DIALOG_TTL` sweep fires (currently 2 h). Override the shared - // reqwest client's 300-s default per-call so the user has the - // full TTL to fill out the form. We add 60 s slack on top so a - // backend-side TTL cancel still reaches us cleanly before our - // own timeout. v0.4.41. + // Async render (Step 3): POST opts in via `x-aiui-async`; the companion + // registers + surfaces the dialog and returns immediately with + // `{id, ttl_secs}` (202). We then poll `GET /render/{id}` in bounded + // windows until the terminal result. No single connection is held for the + // user's think-time, so a tunnel/GUI blip can cost at most one poll + // window — never a multi-minute ReadError. The POST itself only covers + // registration + the ack handshake, so a short timeout suffices. + // + // Backward-compatible: an older companion ignores the unknown header and + // answers synchronously (200 with the terminal `{cancelled, …}` shape) — + // detected after the status checks below and used directly, no polling. let resp = http .post(&url) .bearer_auth(&token) + .header("x-aiui-async", "1") .json(&body) - .timeout(std::time::Duration::from_secs( - crate::dialog::DIALOG_TTL.as_secs() + 60, - )) + .timeout(std::time::Duration::from_secs(30)) .send() .await .map_err(|e| RenderError::Transport(format!("POST /render: {e}")))?; @@ -711,9 +714,57 @@ async fn render_dialog( resp.status() ))); } - resp.json::() + let accepted = resp.status() == reqwest::StatusCode::ACCEPTED; + let first = resp + .json::() .await - .map_err(|e| RenderError::Transport(format!("parse /render: {e}"))) + .map_err(|e| RenderError::Transport(format!("parse /render: {e}")))?; + if !accepted { + // Synchronous companion (old): `first` is already the terminal result. + return Ok(first); + } + // Async companion: poll `GET /render/{id}` until terminal. Each GET is + // bounded (40 s > the server's ~25 s poll window) so the server always + // answers `{pending:true}` before we time out, and we re-poll. The loop + // ends on the terminal result, a 404 (id expired / never registered), or + // the server-side TTL turning into a terminal `cancelled` result. + let id = match first.get("id").and_then(|v| v.as_str()) { + Some(s) => s.to_string(), + None => { + return Err(RenderError::Transport( + "async /render: 202 response missing `id`".into(), + )) + } + }; + let poll_url = format!("{}/render/{}", base_url(cfg), id); + loop { + let pr = http + .get(&poll_url) + .bearer_auth(&token) + .timeout(std::time::Duration::from_secs(40)) + .send() + .await + .map_err(|e| RenderError::Transport(format!("GET /render/{id}: {e}")))?; + if pr.status() == reqwest::StatusCode::NOT_FOUND { + return Err(RenderError::Transport(format!( + "aiui lost track of render {id} (expired or never registered)" + ))); + } + if !pr.status().is_success() { + return Err(RenderError::Transport(format!( + "render poll http {}", + pr.status() + ))); + } + let pv = pr + .json::() + .await + .map_err(|e| RenderError::Transport(format!("parse /render/{id}: {e}")))?; + if pv.get("pending").and_then(|v| v.as_bool()) == Some(true) { + continue; + } + return Ok(pv); + } } /// Tool-call response signaling that the companion is alive but diff --git a/docs/architecture/stabilization-plan.md b/docs/architecture/stabilization-plan.md index 921a763..cfcb3f8 100644 --- a/docs/architecture/stabilization-plan.md +++ b/docs/architecture/stabilization-plan.md @@ -143,6 +143,33 @@ Files: `http.rs`, `dialog.rs`, `mcp.rs`, `python/.../server.py`. HTTP (= ReadError: tunnel up, Mac down) vs. 401 (token). Today the remote `ConnectError` branch is dead and ReadError gets a misleading message. +> **Implemented (2026-05-30, PR #137) — additive / backward-compatible.** Async +> render is opt-in via the `x-aiui-async` request header, *not* a replacement of +> the synchronous long-poll. This keeps the proven local path intact and means +> old bridges (which never send the header) keep working unchanged, so +> `WIRE_VERSION` stays 1. +> - **Companion (`http.rs`):** `POST /render` with the header registers + +> surfaces the dialog, hands resolution to a detached task that fills an +> `AsyncSlot`, and returns `202 {id, ttl_secs}`. New `GET /render/{id}` +> poll-loops (200 ms ticks, bounded by `ASYNC_POLL_WINDOW` = 25 s) returning +> the terminal result (drained once) / `{pending:true}` / `404`. Without the +> header, the legacy synchronous path runs untouched. Resolution + window +> teardown are shared by both via `resolve_dialog`. Resolved-but-uncollected +> slots are swept at `DIALOG_TTL`. +> - **Both bridges (`mcp.rs`, `server.py`):** POST with the header, then loop +> `GET /render/{id}` until terminal; each GET is bounded (40 s > server +> window) so a blip costs one poll, never a held connection. Both fall back to +> the synchronous result if the companion answers 200 instead of 202 (so a new +> bridge works against an old companion too). +> - **Python parity (I6):** added `_wait_for_aiui` (`/ping` cold-start poll, +> ~30 s), MCP progress notifications each pending poll (FastMCP +> `Context.report_progress`, best-effort), the async polling loop, and an +> explicit `httpx.ReadError` branch ("tunnel up, Mac not serving") distinct +> from `ConnectError`. The Rust bridge already had cold-start + progress. +> - Tests: Rust 106 (slot lifecycle), Python 26 (+5: poll terminal/pending/404, +> progress tick, cold-start tolerance). Not yet exercised end-to-end against a +> live remote — integration harness is still the open cross-cutting item. + ## Step 4 — Tunnel mechanism + multi-window (one open decision) Files: `tunnel.rs`, `setup.rs`, `lib.rs`, `dialog.rs`, `http.rs`, frontend. diff --git a/python/src/aiui_mcp/server.py b/python/src/aiui_mcp/server.py index adfef8f..c62129b 100644 --- a/python/src/aiui_mcp/server.py +++ b/python/src/aiui_mcp/server.py @@ -16,6 +16,7 @@ """ from __future__ import annotations +import asyncio import base64 import importlib.metadata import importlib.resources as resources @@ -23,12 +24,13 @@ import mimetypes import os import sys +import time from datetime import datetime, timezone from pathlib import Path from typing import Any import httpx -from mcp.server.fastmcp import FastMCP +from mcp.server.fastmcp import Context, FastMCP def _version() -> str: @@ -90,6 +92,18 @@ def _default_token_path() -> str: EXPECTED_WIRE_VERSION = 1 _wire_checked = False +# Cold-start poll (Step 3, bridge parity with the Rust bridge's wait_for_aiui). +# Before the first render call we poll the unauthenticated /ping until the +# companion answers or this budget elapses — so a freshly-launched Claude +# Desktop / just-up SSH tunnel gets time to start serving instead of failing +# the first call. Replaces the brittle single 3 s /health preflight. +COLDSTART_WAIT_S = float(os.environ.get("AIUI_COLDSTART_WAIT_S", "30")) + +# Per-GET timeout for the async-render poll. Must exceed the companion's +# ~25 s server-side poll window so the server always answers `{pending:true}` +# before we time out, letting us re-poll cleanly. +ASYNC_POLL_TIMEOUT_S = 40.0 + _INSTRUCTIONS = """\ aiui is connected — you can render native dialogs on the user's Mac \ instead of asking via chat. Default behaviour for this session: @@ -162,6 +176,20 @@ async def _preflight() -> None: f"local aiui instance holding the port. Run `pkill -f '^aiui$'` on " f"this host. ({_explain_exc(e)})" ) from e + except httpx.ReadError as e: + # Connected at the TCP layer but the stream closed with no HTTP + # response — the classic remote signature of "tunnel is up but the + # Mac side isn't serving" (stale SSH reverse-forward bound to :7777 + # with a dead aiui behind it). Distinct from ConnectError (nothing + # listening) and from a clean 401/5xx. + raise RuntimeError( + f"aiui companion at {ENDPOINT} accepted the connection but sent no " + f"response (ReadError). On a remote this means the SSH reverse-tunnel " + f"is up but the Mac-side aiui isn't serving — Claude Desktop may be " + f"closed, or a stale tunnel is squatting :7777. Open Claude Desktop on " + f"the Mac; if it persists, re-register this remote in aiui.app settings. " + f"({_explain_exc(e)})" + ) from e except httpx.RemoteProtocolError as e: # Connection reset / closed mid-response. The on-Mac mcp-stdio # child's auto-resurrect normally brings aiui.app back on the @@ -322,7 +350,72 @@ def _resolve_local_paths(node: Any) -> None: _resolve_local_paths(item) -async def _post_render(spec: dict[str, Any]) -> dict[str, Any]: +async def _wait_for_aiui() -> None: + """Poll the unauthenticated `/ping` until the companion answers or + `COLDSTART_WAIT_S` elapses (Step 3, parity with the Rust bridge). + + Gives a cold companion (Claude Desktop just launched, SSH tunnel just came + up) time to start serving before the first render, instead of failing the + call outright. Tolerant: on timeout we simply fall through to `_preflight`, + which produces the precise reachability diagnosis. `/ping` is cheap and + needs no token, so this is a light readiness gate, not a full health check. + """ + deadline = time.monotonic() + COLDSTART_WAIT_S + async with httpx.AsyncClient(timeout=2.0) as client: + while time.monotonic() < deadline: + try: + r = await client.get(f"{ENDPOINT}/ping") + if r.status_code == 200: + return + except httpx.HTTPError: + pass # not up yet — keep polling within the budget + await asyncio.sleep(0.5) + + +async def _poll_render( + client: httpx.AsyncClient, + render_id: str, + ctx: Context | None, +) -> dict[str, Any]: + """Poll `GET /render/{id}` until the terminal result (Step 3 async render). + + Each GET is bounded by `ASYNC_POLL_TIMEOUT_S` (> the server's ~25 s poll + window), so the server always answers `{pending:true}` before we time out + and we re-poll — no single connection is held for the user's think-time, + which is what immunises the remote path against the multi-minute-ReadError + class. Emits an MCP progress notification each pending iteration so the + client (Claude Code) knows the tool is alive, not hung. + """ + poll_url = f"{ENDPOINT}/render/{render_id}" + iteration = 0 + while True: + pr = await client.get( + poll_url, + headers={"Authorization": f"Bearer {_token()}"}, + timeout=ASYNC_POLL_TIMEOUT_S, + ) + if pr.status_code == 404: + raise RuntimeError( + f"aiui lost track of render {render_id} (expired or never " + f"registered). Restart the dialog." + ) + pr.raise_for_status() + pv = pr.json() + if pv.get("pending") is True: + iteration += 1 + if ctx is not None: + # Best-effort: a missing progressToken or any reporting hiccup + # must never break the render. + try: + await ctx.report_progress(progress=float(iteration), total=None) + except Exception as e: # noqa: BLE001 + log.debug("progress report skipped: %s", _explain_exc(e)) + continue + return pv + + +async def _post_render(spec: dict[str, Any], ctx: Context | None = None) -> dict[str, Any]: + await _wait_for_aiui() await _preflight() t0 = datetime.now(timezone.utc) log.info("render → kind=%s", spec.get("kind")) @@ -334,14 +427,26 @@ async def _post_render(spec: dict[str, Any]) -> dict[str, Any]: # only handles HTTPS. _resolve_local_paths(spec) async with httpx.AsyncClient(timeout=TIMEOUT_S) as client: + # Async render (Step 3): opt in via the header. A current companion + # registers the dialog and answers immediately with `{id, ttl_secs}` + # (202); we then poll for the result. An older companion ignores the + # header and answers synchronously (200 with the terminal shape) — we + # detect that and use it directly (backward-compatible). r = await client.post( f"{ENDPOINT}/render", - headers={"Authorization": f"Bearer {_token()}"}, + headers={"Authorization": f"Bearer {_token()}", "x-aiui-async": "1"}, json={"spec": spec}, ) r.raise_for_status() + first = r.json() + if r.status_code == 202: + render_id = first.get("id") + if not render_id: + raise RuntimeError("async /render: 202 response missing `id`") + data = await _poll_render(client, render_id, ctx) + else: + data = first # synchronous companion — terminal result already dt = (datetime.now(timezone.utc) - t0).total_seconds() - data = r.json() log.info( "render ← kind=%s cancelled=%s took=%.2fs", spec.get("kind"), data.get("cancelled"), dt, @@ -362,6 +467,7 @@ async def ask( header: str | None = None, multi_select: bool = False, allow_other: bool = True, + ctx: Context | None = None, ) -> dict[str, Any]: """Before listing options in chat and waiting for the user to type back which one (deploy strategy, migration path, file to act on …), call @@ -401,7 +507,7 @@ async def ask( "multiSelect": multi_select, "allowOther": allow_other, } - return _format_result(await _post_render(spec)) + return _format_result(await _post_render(spec, ctx)) @mcp.tool() @@ -414,6 +520,7 @@ async def form( actions: list[dict[str, Any]] | None = None, submit_label: str | None = None, cancel_label: str | None = None, + ctx: Context | None = None, ) -> dict[str, Any]: """Whenever the user needs to provide ≥ 2 related inputs, or any single input that doesn't belong in chat (secret, date/datetime/range, @@ -506,7 +613,7 @@ async def form( "submitLabel": submit_label, "cancelLabel": cancel_label, } - return _format_result(await _post_render(spec)) + return _format_result(await _post_render(spec, ctx)) @mcp.tool() @@ -518,6 +625,7 @@ async def confirm( confirm_label: str | None = None, cancel_label: str | None = None, image: dict[str, Any] | None = None, + ctx: Context | None = None, ) -> dict[str, Any]: """Before writing any yes/no question into chat, call this tool instead. Pass `destructive=True` (red button) for delete / drop / force-push / @@ -561,7 +669,7 @@ async def confirm( "cancelLabel": cancel_label, "image": image, } - return _format_result(await _post_render(spec)) + return _format_result(await _post_render(spec, ctx)) @mcp.prompt(name="teach") diff --git a/python/tests/test_async_render.py b/python/tests/test_async_render.py new file mode 100644 index 0000000..e818028 --- /dev/null +++ b/python/tests/test_async_render.py @@ -0,0 +1,139 @@ +"""Tests for the async-render client path (Step 3): the bridge POSTs, then +polls `GET /render/{id}` until a terminal result, emitting progress on the way. +""" +from __future__ import annotations + +import asyncio +from typing import Any + +import httpx +import pytest + +import aiui_mcp.server as server +from aiui_mcp.server import _poll_render, _wait_for_aiui + + +class _FakeResp: + def __init__(self, payload: dict[str, Any], status: int = 200) -> None: + self._payload = payload + self.status_code = status + + def raise_for_status(self) -> None: + return None # no test drives the >=400 path through here + + def json(self) -> dict[str, Any]: + return self._payload + + +def _setup_token(monkeypatch: pytest.MonkeyPatch, tmp_path: Any) -> None: + token_file = tmp_path / "token" + token_file.write_text("dummy-token-for-tests") + monkeypatch.setattr(server, "TOKEN_PATH", token_file) + + +def test_poll_render_returns_terminal_after_pending( + monkeypatch: pytest.MonkeyPatch, tmp_path: Any +) -> None: + _setup_token(monkeypatch, tmp_path) + seq = [ + _FakeResp({"pending": True}), + _FakeResp({"pending": True}), + _FakeResp({"id": "x", "cancelled": False, "result": {"confirmed": True}}), + ] + calls = {"n": 0} + + async def fake_get(self: Any, url: str, **kwargs: Any) -> Any: + i = min(calls["n"], len(seq) - 1) + calls["n"] += 1 + return seq[i] + + monkeypatch.setattr(httpx.AsyncClient, "get", fake_get) + + async def run() -> dict[str, Any]: + async with httpx.AsyncClient() as client: + return await _poll_render(client, "x", None) + + data = asyncio.run(run()) + assert data["cancelled"] is False + assert data["result"]["confirmed"] is True + assert calls["n"] == 3 # two pending polls, then the terminal one + + +def test_poll_render_reports_progress_each_pending_iteration( + monkeypatch: pytest.MonkeyPatch, tmp_path: Any +) -> None: + _setup_token(monkeypatch, tmp_path) + seq = [_FakeResp({"pending": True}), _FakeResp({"id": "x", "cancelled": True})] + calls = {"n": 0} + + async def fake_get(self: Any, url: str, **kwargs: Any) -> Any: + i = min(calls["n"], len(seq) - 1) + calls["n"] += 1 + return seq[i] + + monkeypatch.setattr(httpx.AsyncClient, "get", fake_get) + + class _Ctx: + def __init__(self) -> None: + self.ticks: list[float] = [] + + async def report_progress( + self, progress: float, total: Any = None, message: Any = None + ) -> None: + self.ticks.append(progress) + + ctx = _Ctx() + + async def run() -> dict[str, Any]: + async with httpx.AsyncClient() as client: + return await _poll_render(client, "x", ctx) # type: ignore[arg-type] + + data = asyncio.run(run()) + assert data["cancelled"] is True + assert ctx.ticks == [1.0] # one pending iteration → one progress tick + + +def test_poll_render_raises_on_404( + monkeypatch: pytest.MonkeyPatch, tmp_path: Any +) -> None: + _setup_token(monkeypatch, tmp_path) + + async def fake_get(self: Any, url: str, **kwargs: Any) -> Any: + return _FakeResp({"error": "unknown_render_id"}, status=404) + + monkeypatch.setattr(httpx.AsyncClient, "get", fake_get) + + async def run() -> dict[str, Any]: + async with httpx.AsyncClient() as client: + return await _poll_render(client, "gone", None) + + with pytest.raises(RuntimeError) as exc_info: + asyncio.run(run()) + assert "lost track" in str(exc_info.value) + + +def test_wait_for_aiui_returns_when_ping_ok( + monkeypatch: pytest.MonkeyPatch, tmp_path: Any +) -> None: + _setup_token(monkeypatch, tmp_path) + + async def fake_get(self: Any, url: str, **kwargs: Any) -> Any: + return _FakeResp({}, status=200) + + monkeypatch.setattr(httpx.AsyncClient, "get", fake_get) + asyncio.run(_wait_for_aiui()) # returns promptly, no raise + + +def test_wait_for_aiui_tolerates_unreachable_within_budget( + monkeypatch: pytest.MonkeyPatch, tmp_path: Any +) -> None: + _setup_token(monkeypatch, tmp_path) + monkeypatch.setattr(server, "COLDSTART_WAIT_S", 0.2) + + async def fake_get(self: Any, url: str, **kwargs: Any) -> Any: + raise httpx.ConnectError("companion down") + + monkeypatch.setattr(httpx.AsyncClient, "get", fake_get) + # Must NOT raise — it falls through after the budget so _preflight can + # produce the precise diagnosis. + asyncio.run(_wait_for_aiui()) From 930ddf229a382e2dacfde189f965a182ecea3861 Mon Sep 17 00:00:00 2001 From: iret77 <63622643+iret77@users.noreply.github.com> Date: Sat, 30 May 2026 17:12:08 +0200 Subject: [PATCH 05/27] =?UTF-8?q?feat:=20Step=204=20=E2=80=94=20multi-wind?= =?UTF-8?q?ow=20dialogs=20+=20session=20identifier=20(pull=20model)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drops single-occupancy and gives every render its own window, labelled by the dialog id, so N dialogs (parallel sessions) coexist instead of fighting over one reused window and 409-ing each other (Invariant I8). The tunnel half of Step 4 (aiui-dedicated cleanup, Mac-side) is deferred — see the plan doc. Rather than multiply the fragile emit/ack/ready-handshake per window, this inverts to a PULL model: the window carries the id as its label and fetches its own spec on mount, so there's no event-before-listener race to guard and a large amount of single-window workaround code is removed. - dialog.rs: try_register (409) → register_dialog (N concurrent, evict-oldest only at DIALOG_HARD_CAP). Stores the request (spec + ttl + session + session_origin) for pull via get_request. cancel_all removed — per-id cancel only (a blunt drain would kill other sessions' live dialogs). - http.rs: POST /render builds a fresh per-id window (build_dialog_window); the emit / dialog_window_ready / ack-timeout / reload-retry / idle-restart machinery is gone. Teardown, RenderGuard and resolve_dialog are per-id. - lib.rs: get_dialog_spec command; per-id destroy_dialog_window; X-close cancels only the closed window's dialog; orphan-sweep + Accessory demote are multi-window aware. Removed dialog_received / dialog_window_ready commands and the ready watch channel. - Frontend DialogShell.svelte: reads its window label (= id), pulls the spec, and shows a fixed top-right session chip (session · session_origin), hidden when neither is set. No more dialog:show listener / ack round-trip. - Bridges: `session` tool param on both (mcp.rs, server.py); the Python bridge auto-injects socket.gethostname() as session_origin (I8 fallback for remotes sharing :7777). Tests: cargo 102, clippy -D warnings clean, python 26, svelte-check 0 errors. GUI behaviour is NOT verifiable from the headless remote — needs validation on the Mac (the integration harness is the right home for it). Refs #137 Co-Authored-By: Claude Opus 4.8 (1M context) --- companion/src-tauri/src/dialog.rs | 270 ++++++------------ companion/src-tauri/src/http.rs | 357 ++++-------------------- companion/src-tauri/src/lib.rs | 330 +++++++++------------- companion/src-tauri/src/mcp.rs | 12 +- companion/src/i18n/de.json | 1 + companion/src/i18n/en.json | 1 + companion/src/lib/DialogShell.svelte | 135 ++++++--- docs/architecture/stabilization-plan.md | 40 ++- python/src/aiui_mcp/server.py | 33 ++- 9 files changed, 444 insertions(+), 735 deletions(-) diff --git a/companion/src-tauri/src/dialog.rs b/companion/src-tauri/src/dialog.rs index e0684af..f3e4fd6 100644 --- a/companion/src-tauri/src/dialog.rs +++ b/companion/src-tauri/src/dialog.rs @@ -14,6 +14,18 @@ pub struct DialogRequest { /// auto-cancel slightly before the backend sweeps. Single source /// of truth for "how long the user has" is here in Rust. v0.4.41. pub ttl_secs: u64, + /// Human-legible session label the caller passed (project name, task, + /// etc.) so the user can tell which session a dialog belongs to when + /// several are open at once (Invariant I8). `None` if the caller passed + /// nothing — the window then falls back to `session_origin` + short id. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub session: Option, + /// Origin host of the caller, auto-injected by the remote Python bridge + /// (its `hostname`) since the Mac can't distinguish remotes sharing + /// `:7777`. `None`/absent for local callers. Shown in the window chrome + /// alongside `session` (I8). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub session_origin: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -50,10 +62,12 @@ pub const DIALOG_HARD_CAP: usize = 16; struct PendingEntry { /// Resolves the `/render` waiter once the user submits or cancels. result_tx: oneshot::Sender, - /// Resolves the per-render ack waiter the first time the frontend - /// confirms it received `dialog:show`. Wrapped in `Option` so we can - /// take it out exactly once. - ack_tx: Option>, + /// The full request payload (spec, ttl, session chrome). Stored so the + /// per-id dialog window can *pull* it on mount via `get_dialog_spec` + /// (Step 4 multi-window) instead of the backend emitting + waiting for an + /// ack — the pull model removes the event-ordering race the old + /// `dialog:show` + `dialog_window_ready` handshake existed to paper over. + request: DialogRequest, created_at: Instant, } @@ -199,16 +213,6 @@ fn collect_visible_fields(spec: &serde_json::Value) -> Vec<&serde_json::Value> { out } -/// Returned by `try_register` when a dialog is already in flight. -/// Surfaced via /render as a 409 so the calling agent can distinguish -/// "companion not reachable" from "companion is busy with someone -/// else's dialog right now". v0.4.36. -#[derive(Debug, Clone, Copy)] -pub struct BusyInfo { - pub pending_count: usize, - pub oldest_age_secs: u64, -} - impl DialogState { pub fn new() -> Self { Self { @@ -216,30 +220,21 @@ impl DialogState { } } - /// Registers a new dialog and returns `(id, result_rx, ack_rx)`. The - /// caller is responsible for surfacing the window + emitting the - /// `dialog:show` event. - /// - /// Performs an opportunistic sweep before insert: TTL-expired entries - /// are cancelled and removed, and if the hard cap would be exceeded - /// the oldest entry is evicted. No background reaper is needed. - /// - /// As of v0.4.36 the production /render path uses `try_register` - /// instead, which rejects rather than evicts when a dialog is - /// already in flight. `register` is retained as the - /// hard-cap-defense fallback for tests and any future call site - /// that legitimately wants eviction semantics. - #[allow(dead_code)] - pub fn register( + /// Register a new dialog and return `(id, result_rx)`. Multi-window + /// (Step 4, Invariant I8): N dialogs may be in flight at once — this never + /// rejects (the old single-occupancy 409 is gone). It sweeps TTL-expired + /// entries and, only if the hard cap would be exceeded, evicts the single + /// oldest. The caller (`/render`) builds a per-id window; the window pulls + /// the stored `DialogRequest` via [`Self::get_request`] on mount. + pub fn register_dialog( &self, - ) -> ( - String, - oneshot::Receiver, - oneshot::Receiver<()>, - ) { + spec: serde_json::Value, + session: Option, + session_origin: Option, + ttl_secs: u64, + ) -> (String, oneshot::Receiver) { let id = Uuid::new_v4().to_string(); let (result_tx, result_rx) = oneshot::channel(); - let (ack_tx, ack_rx) = oneshot::channel(); let mut map = self.pending.lock().unwrap(); @@ -279,27 +274,31 @@ impl DialogState { } } + let request = DialogRequest { + id: id.clone(), + spec, + ttl_secs, + session, + session_origin, + }; map.insert( id.clone(), PendingEntry { result_tx, - ack_tx: Some(ack_tx), + request, created_at: now, }, ); - (id, result_rx, ack_rx) + (id, result_rx) } - /// Marks the dialog with `id` as having been received by the frontend. - /// Idempotent: the second call is a silent no-op (oneshot already sent). - pub fn ack(&self, id: &str) { - let mut map = self.pending.lock().unwrap(); - if let Some(entry) = map.get_mut(id) { - if let Some(tx) = entry.ack_tx.take() { - let _ = tx.send(()); - } - } + /// Return a clone of the stored request for `id`, for the per-id dialog + /// window to pull on mount (Step 4 pull model). `None` if the dialog is + /// gone (already resolved, evicted, or never existed) — the window then + /// closes itself. + pub fn get_request(&self, id: &str) -> Option { + self.pending.lock().unwrap().get(id).map(|e| e.request.clone()) } pub fn complete(&self, id: &str, result: serde_json::Value) { @@ -326,94 +325,11 @@ impl DialogState { } } - /// Cancel every pending dialog with `reason`, resolving each waiting - /// `/render` as cancelled. Returns how many were cancelled. Used by - /// the dialog-window X-close handler and the orphan-window sweep - /// (v0.4.46, Bug B+) so that tearing the window down *always* - /// produces a terminal answer for any in-flight render — never a - /// silent hang behind an empty window. - pub fn cancel_all(&self, reason: &str) -> usize { - let mut map = self.pending.lock().unwrap(); - let drained: Vec<(String, PendingEntry)> = map.drain().collect(); - let n = drained.len(); - for (id, entry) in drained { - let _ = entry.result_tx.send(DialogResult { - id, - cancelled: true, - result: serde_json::Value::Null, - reason: Some(reason.to_string()), - }); - } - n - } - - /// Like `register` but rejects with `BusyInfo` if a dialog is already - /// in flight after the TTL sweep. Used by `/render` so that two - /// parallel callers (multiple aiui calls in one assistant turn, - /// two Claude sessions hitting the same companion, a stale window - /// from a previous timeout) can't silently overlay each other — - /// the second caller gets a clear conflict response instead of - /// having its predecessor's dialog evicted underfoot. v0.4.36. - /// - /// `register` is kept for tests and for any future call site that - /// genuinely wants the eviction-based behaviour, but the - /// production /render path uses `try_register` exclusively. - pub fn try_register( - &self, - ) -> Result< - ( - String, - oneshot::Receiver, - oneshot::Receiver<()>, - ), - BusyInfo, - > { - let mut map = self.pending.lock().unwrap(); - - // Sweep TTL-expired entries first — those don't count as - // "in flight" any more. Same logic as `register`. - let now = Instant::now(); - let expired: Vec = map - .iter() - .filter(|(_, e)| now.duration_since(e.created_at) > DIALOG_TTL) - .map(|(k, _)| k.clone()) - .collect(); - for stale_id in expired { - if let Some(entry) = map.remove(&stale_id) { - let _ = entry.result_tx.send(DialogResult { - id: stale_id, - cancelled: true, - result: serde_json::Value::Null, - reason: Some("ttl_expired".into()), - }); - } - } - - if !map.is_empty() { - let oldest_age_secs = map - .values() - .map(|e| now.duration_since(e.created_at).as_secs()) - .max() - .unwrap_or(0); - return Err(BusyInfo { - pending_count: map.len(), - oldest_age_secs, - }); - } - - let id = Uuid::new_v4().to_string(); - let (result_tx, result_rx) = oneshot::channel(); - let (ack_tx, ack_rx) = oneshot::channel(); - map.insert( - id.clone(), - PendingEntry { - result_tx, - ack_tx: Some(ack_tx), - created_at: now, - }, - ); - Ok((id, result_rx, ack_rx)) - } + // (Step 4 removed `cancel_all`: multi-window cancels per-id — the + // X-close handler cancels the closed window's own dialog by its label-id, + // and `sweep_orphan_dialog_window` reaps each orphan window individually. + // A blunt "cancel everything" would wrongly tear down other sessions' + // live dialogs.) /// Snapshot for `/health` / diagnostics. Cheap: one mutex acquire. pub fn stats(&self) -> DialogStats { @@ -438,10 +354,14 @@ impl DialogState { mod tests { use super::*; + fn reg(s: &DialogState) -> (String, oneshot::Receiver) { + s.register_dialog(serde_json::json!({"kind": "confirm", "title": "?"}), None, None, 0) + } + #[test] fn register_inserts_entry() { let s = DialogState::new(); - let (id, _rx, _ack) = s.register(); + let (id, _rx) = reg(&s); assert!(!id.is_empty()); assert_eq!(s.stats().orphan_count, 1); } @@ -449,7 +369,7 @@ mod tests { #[test] fn complete_resolves_and_removes() { let s = DialogState::new(); - let (id, rx, _ack) = s.register(); + let (id, rx) = reg(&s); s.complete(&id, serde_json::json!({"ok": true})); let r = rx.blocking_recv().unwrap(); assert!(!r.cancelled); @@ -459,7 +379,7 @@ mod tests { #[test] fn cancel_resolves_and_removes() { let s = DialogState::new(); - let (id, rx, _ack) = s.register(); + let (id, rx) = reg(&s); s.cancel(&id); let r = rx.blocking_recv().unwrap(); assert!(r.cancelled); @@ -467,31 +387,32 @@ mod tests { } #[test] - fn cancel_all_resolves_every_pending() { - let s = DialogState::new(); - let (_id, rx, _ack) = s.register(); - let n = s.cancel_all("window_closed"); - assert_eq!(n, 1); - let r = rx.blocking_recv().unwrap(); - assert!(r.cancelled); - assert_eq!(r.reason.as_deref(), Some("window_closed")); - assert_eq!(s.stats().orphan_count, 0); - } - - #[test] - fn cancel_all_on_empty_is_zero() { + fn multiple_dialogs_register_concurrently_no_409() { + // Step 4 / I8: single-occupancy is gone — N dialogs coexist. let s = DialogState::new(); - assert_eq!(s.cancel_all("window_closed"), 0); + let (_a, _ra) = reg(&s); + let (_b, _rb) = reg(&s); + let (_c, _rc) = reg(&s); + assert_eq!(s.stats().orphan_count, 3); } #[test] - fn ack_fires_once() { + fn get_request_returns_stored_payload_then_none_after_resolve() { let s = DialogState::new(); - let (id, _rx, ack) = s.register(); - s.ack(&id); - ack.blocking_recv().expect("first ack must arrive"); - // Second ack on the same id is a silent no-op. - s.ack(&id); + let (id, _rx) = s.register_dialog( + serde_json::json!({"kind": "confirm"}), + Some("my-project".into()), + Some("macmini".into()), + 42, + ); + let req = s.get_request(&id).expect("request stored for pull"); + assert_eq!(req.id, id); + assert_eq!(req.ttl_secs, 42); + assert_eq!(req.session.as_deref(), Some("my-project")); + assert_eq!(req.session_origin.as_deref(), Some("macmini")); + // Once resolved, the pull returns None so the window closes itself. + s.complete(&id, serde_json::json!({})); + assert!(s.get_request(&id).is_none()); } #[test] @@ -579,49 +500,24 @@ mod tests { assert_eq!(w, 880.0, "4-col wireframe inside tab should still widen"); } - #[test] - fn try_register_succeeds_when_empty() { - let s = DialogState::new(); - let res = s.try_register(); - assert!(res.is_ok()); - assert_eq!(s.stats().orphan_count, 1); - } - - #[test] - fn try_register_rejects_when_pending() { - let s = DialogState::new(); - let (_id, _rx, _ack) = s.try_register().expect("first try_register"); - let busy = s.try_register().expect_err("second try_register must be busy"); - assert_eq!(busy.pending_count, 1); - // Registry still holds the original entry. - assert_eq!(s.stats().orphan_count, 1); - } - - #[test] - fn try_register_succeeds_after_complete() { - let s = DialogState::new(); - let (id, _rx, _ack) = s.try_register().expect("first"); - s.complete(&id, serde_json::json!({"ok": true})); - let res = s.try_register(); - assert!(res.is_ok()); - } - #[test] fn hard_cap_evicts_oldest() { let s = DialogState::new(); let mut rxs = Vec::new(); for _ in 0..DIALOG_HARD_CAP { - let (_id, rx, _ack) = s.register(); + let (_id, rx) = reg(&s); rxs.push(rx); } assert_eq!(s.stats().orphan_count, DIALOG_HARD_CAP); - // One more — should evict the oldest. - let (_id, _rx, _ack) = s.register(); + // One more — should evict the oldest (the cap bounds the map; the + // 409 single-occupancy that used to reject earlier is gone). + let (_id, _rx) = reg(&s); assert_eq!(s.stats().orphan_count, DIALOG_HARD_CAP); // The first registered receiver should now resolve as cancelled. let first = rxs.remove(0).blocking_recv().unwrap(); assert!(first.cancelled); + assert_eq!(first.reason.as_deref(), Some("evicted")); } } diff --git a/companion/src-tauri/src/http.rs b/companion/src-tauri/src/http.rs index fec3253..7a26583 100644 --- a/companion/src-tauri/src/http.rs +++ b/companion/src-tauri/src/http.rs @@ -1,6 +1,6 @@ use crate::ack::AckRegistry; use crate::config::AppConfig; -use crate::dialog::{DialogRequest, DialogState, DIALOG_TTL}; +use crate::dialog::{DialogState, DIALOG_TTL}; use crate::lifetime::LifetimeStats; use axum::{ extract::{Path, State}, @@ -17,29 +17,10 @@ use crate::logging::trace; use tauri::{AppHandle, Emitter, Manager}; use tauri_plugin_updater::UpdaterExt; -/// How long the `/render` handler waits for the frontend to acknowledge -/// receipt of `dialog:show` before concluding the WebView event loop is -/// dead and triggering a reload. -const DIALOG_ACK_TIMEOUT: Duration = Duration::from_millis(500); - -/// Pause after `webview.reload()` before re-emitting `dialog:show`. Gives -/// the freshly-loaded Svelte app time to mount and register its listener. -const RELOAD_SETTLE: Duration = Duration::from_millis(300); - /// How long `/health` waits for a `ui:ping` round-trip from the frontend /// before concluding the WebView is unresponsive. const UI_PING_TIMEOUT: Duration = Duration::from_millis(100); -/// Idle-restart trigger: if the GUI has been alive longer than this AND -/// hasn't served a render recently (see `IDLE_RESTART_QUIET`), the next -/// render reloads the WebView before showing — flushes any drift that -/// accumulated while nobody was watching. -const IDLE_RESTART_UPTIME: Duration = Duration::from_secs(24 * 60 * 60); - -/// Minimum time between renders for the long-uptime reload to trigger. -/// Prevents reloading mid-burst when many renders fire close together. -const IDLE_RESTART_QUIET: Duration = Duration::from_secs(10 * 60); - /// Header a bridge sets to opt into async `/render` (Step 3). Present → /// `POST /render` registers + surfaces the dialog, returns `{id, ttl}` /// immediately (202), and the caller polls `GET /render/{id}`. Absent → the @@ -76,13 +57,6 @@ struct AppState { ui_acks: Arc, lifetime: Arc, app: AppHandle, - /// Process-start timestamp for the GUI. Used to evaluate the - /// idle-restart condition without requiring an OS sleep/wake hook. - started_at: Instant, - /// Last time `/render` produced (or attempted to produce) a dialog. - /// Mutex is fine here — contention is bounded by the rate of - /// /render calls. - last_render_at: Arc>, /// Buffered terminal results for async renders (Step 3), keyed by dialog /// id. Empty in the all-synchronous case. async_slots: Arc>>, @@ -93,6 +67,15 @@ struct RenderRequest { #[serde(default)] _timeout_s: Option, spec: serde_json::Value, + /// Human-legible session label set by the caller (Step 4, I8). Shown in + /// the dialog window's chrome so the user can tell which session a dialog + /// belongs to. Optional. + #[serde(default)] + session: Option, + /// Origin host, auto-injected by the remote Python bridge (its hostname). + /// Optional; absent for local callers. + #[serde(default)] + session_origin: Option, } #[derive(Serialize)] @@ -182,15 +165,12 @@ pub async fn serve( app: AppHandle, ) -> std::io::Result<()> { let port = cfg.http_port; - let now = Instant::now(); let state = AppState { cfg, dialog, ui_acks, lifetime, app, - started_at: now, - last_render_at: Arc::new(Mutex::new(now)), async_slots: Arc::new(Mutex::new(std::collections::HashMap::new())), }; @@ -616,11 +596,12 @@ impl Drop for RenderGuard { )); // Free the registry slot so the next /render isn't 409'd for 2 h. self.dialog.cancel(&self.id); - // Tear down the surfaced window so it can't strand empty. + // Tear down the surfaced window (labelled by id) so it can't strand. if let Some(app) = &self.app { let app_for_destroy = app.clone(); + let id_for_destroy = self.id.clone(); let _ = app.run_on_main_thread(move || { - crate::destroy_dialog_window(&app_for_destroy) + crate::destroy_dialog_window(&app_for_destroy, &id_for_destroy) }); } } @@ -668,9 +649,10 @@ async fn resolve_dialog( // guarantees a dialog window never outlives its dialog. Idempotent — // a no-op on the submit/cancel paths where the window is already gone. let app_for_destroy = state.app.clone(); + let id_for_destroy = id.clone(); let _ = state .app - .run_on_main_thread(move || crate::destroy_dialog_window(&app_for_destroy)); + .run_on_main_thread(move || crate::destroy_dialog_window(&app_for_destroy, &id_for_destroy)); result } @@ -813,193 +795,46 @@ async fn render( .into_response(); } - // v0.4.36: try_register rejects when a dialog is already in flight - // instead of evicting the existing one. Two parallel callers — multi- - // call-per-turn, two Claude sessions, or a stale window from a prior - // timeout — would otherwise overlay each other in the single dialog - // window, with the older request's `oneshot` resolving as `evicted` - // exactly while the user was still looking at it. The 409 response - // gives the second caller a structured "busy" answer so the agent - // can choose to retry or tell the user the dialog is held by - // something else. Setup-window-driven UI calls don't go through - // /render at all, so this only governs agent dialog traffic. - let (id, result_rx, ack_rx) = match state.dialog.try_register() { - Ok(triple) => triple, - Err(busy) => { - trace(&format!( - "render: rejected — companion busy (pending={}, oldest_age={}s)", - busy.pending_count, busy.oldest_age_secs - )); - return ( - StatusCode::CONFLICT, - Json(serde_json::json!({ - "error": "busy", - "pending_count": busy.pending_count, - "oldest_age_secs": busy.oldest_age_secs, - })), - ) - .into_response(); - } - }; + // Multi-window (Step 4, I8): N dialogs may be in flight at once — the + // single-occupancy 409 is gone. `register_dialog` stores the request (the + // per-id window pulls it via `get_dialog_spec`) and only evicts the oldest + // if the hard cap is hit. Setup-window UI calls don't go through /render, + // so this governs agent dialog traffic only. Size is estimated from the + // spec before it moves into the registry. + let size = crate::dialog::estimate_dialog_size(&req.spec); + let (id, result_rx) = state.dialog.register_dialog( + req.spec, + req.session, + req.session_origin, + DIALOG_TTL.as_secs(), + ); trace(&format!("render: registered id={}", id)); - // Cancellation-safety net: from here until the explicit terminal teardown - // below, a dropped handler future (client give-up) must not leak the - // registry entry or strand the window. See `RenderGuard`. + // Cancellation-safety net: until the terminal teardown (sync) or the + // hand-off to the detached task (async), a dropped handler future must not + // leak the registry entry or strand the window. See `RenderGuard`. let mut guard = RenderGuard { id: id.clone(), dialog: state.dialog.clone(), app: Some(state.app.clone()), armed: true, }; - let dr = DialogRequest { - id: id.clone(), - spec: req.spec, - // Sent so the frontend can schedule warning banners + auto-cancel - // a fraction before the backend sweep fires. Single source of - // truth lives in `DIALOG_TTL`. v0.4.41. - ttl_secs: DIALOG_TTL.as_secs(), - }; - - // ── Idle-restart check (#41) ──────────────────────────────────────── - // If the GUI has been up for a long time and the last render was a - // while ago, reload the WebView before serving this one. Catches - // accumulated drift (sleep/wake artefacts, stuck event listeners) - // *exactly* when it would matter — not on a wall-clock timer. - // - // Important: never reload while a previous dialog is still pending. - // The reload tears down the WebView's JS state including any active - // dialog the user might be looking at, and the still-awaiting - // `/render` handler would get a `channel_dropped` cancellation - // instead of the user's actual answer. Only reload when the registry - // is empty. Issue #H-6 in v0.4.10 review. - { - let last = *state.last_render_at.lock().unwrap(); - let pending = state.dialog.stats().orphan_count; - if state.started_at.elapsed() > IDLE_RESTART_UPTIME - && last.elapsed() > IDLE_RESTART_QUIET - && pending == 0 - { - trace(&format!( - "render: idle-restart trigger (uptime {:?}, last_render {:?} ago, registry empty)", - state.started_at.elapsed(), - last.elapsed() - )); - reload_main_webview(&state.app); - tokio::time::sleep(RELOAD_SETTLE).await; - } else if state.started_at.elapsed() > IDLE_RESTART_UPTIME - && last.elapsed() > IDLE_RESTART_QUIET - { - trace(&format!( - "render: idle-restart suppressed — {} pending dialog(s) in registry", - pending - )); - } - } - // Mark this render attempt — done early so the ack/recreate path - // still resets the idle clock even if the user closes the dialog. - *state.last_render_at.lock().unwrap() = Instant::now(); - - // Surface the window from the main thread. If the window is being - // built fresh (first render of this session, or after the user - // closed it), `ensure_dialog_window` reset the ready flag. - // Window-size estimate is per-spec — wide widgets widen, long - // forms grow vertically. v0.4.40. - let size = crate::dialog::estimate_dialog_size(&dr.spec); - surface_main_window(&state.app, &id, size); - - // Window-ready handshake: wait until the frontend signals that - // its `dialog:show` listener is registered. Without this gate - // we'd race against Vite-bundle-load + Svelte-mount + tauri-listen, - // and on the first render of a session the emit would land before - // the listener — silent loss, 500 ms ack timeout, webview reload - // and a confused user staring at a blank window. - wait_for_dialog_ready(&state.app, "pre-emit").await; - - // Emit the dialog to the frontend. - if let Err(e) = state - .app - .emit_to(crate::DIALOG_WINDOW_LABEL, "dialog:show", &dr) + // Build a fresh window labelled by the dialog id (Step 4 pull model). The + // window reads its own label and fetches the spec via `get_dialog_spec` on + // mount — there is no `dialog:show` emit, no ready-handshake, no ack + // timeout, and no reload-retry, because the frontend initiates and so + // can't race an event it isn't listening for yet. Window ops are + // main-thread-only. { - trace(&format!("render: emit FAILED: {e}")); - } else { - trace(&format!("render: emitted dialog:show id={}", id)); - } - - // ── Ack-Contract ──────────────────────────────────────────────────── - // Wait briefly for the frontend to confirm receipt. If no ack arrives, - // the WebView event loop is most likely dead — try to revive it by - // reloading the webview, then re-emitting once. If the second ack also - // fails, give up and surface a structured error to the caller instead - // of blocking indefinitely on a dialog the user will never see. - match tokio::time::timeout(DIALOG_ACK_TIMEOUT, ack_rx).await { - Ok(Ok(())) => { - trace(&format!("render: ack ok id={}", id)); - } - _ => { - trace(&format!( - "render: no ack within {:?}; reloading webview and retrying", - DIALOG_ACK_TIMEOUT - )); - // Reset ready flag — after reload the listeners need to - // re-register. We'll wait on the handshake again before - // re-emitting. - if let Some(tx) = state - .app - .try_state::>>() - { - let _ = tx.inner().send(false); + let app_for_build = state.app.clone(); + let id_for_build = id.clone(); + let _ = state.app.run_on_main_thread(move || { + if let Err(e) = crate::build_dialog_window(&app_for_build, &id_for_build, size) { + trace(&format!( + "render: build_dialog_window failed id={id_for_build}: {e}" + )); } - reload_main_webview(&state.app); - tokio::time::sleep(RELOAD_SETTLE).await; - - // Wait for the freshly-mounted Svelte to signal listeners - // are wired up again. Without this the same race that got - // us here would just repeat after reload. - wait_for_dialog_ready(&state.app, "post-reload").await; - - // After reload the previous ack receiver was consumed. We need a - // fresh handshake on the same dialog id — register a new ack - // slot tied to the same id is overkill; instead we just re-emit - // and wait on the same (already-armed) ack registry by treating - // the second emit's resolution as the ack we care about. - // - // Since `register()` only created one ack channel and we just - // consumed its receiver via the timeout, we have to fall back - // to a small generic ack via the AckRegistry for the second - // round. That keeps DialogState simple. - let (probe_id, probe_rx) = state.ui_acks.register(); - if let Err(e) = state - .app - .emit_to(crate::DIALOG_WINDOW_LABEL, "ui:ping", &probe_id) - { - trace(&format!("render: post-reload ui:ping emit failed: {e}")); - state.ui_acks.forget(&probe_id); - } - match tokio::time::timeout(DIALOG_ACK_TIMEOUT, probe_rx).await { - Ok(Ok(())) => { - trace("render: post-reload webview is responsive, re-emitting dialog:show"); - if let Err(e) = - state.app.emit_to(crate::DIALOG_WINDOW_LABEL, "dialog:show", &dr) - { - trace(&format!("render: re-emit FAILED: {e}")); - } - } - _ => { - state.ui_acks.forget(&probe_id); - trace("render: webview still unreachable after reload — giving up"); - state.dialog.cancel(&id); - return ( - StatusCode::SERVICE_UNAVAILABLE, - Json(serde_json::json!({ - "error": "ui_unreachable", - "detail": "webview did not acknowledge dialog:show after reload", - })), - ) - .into_response(); - } - } - } + }); } // ── Async branch (Step 3) ─────────────────────────────────────────── @@ -1059,97 +894,6 @@ async fn render( .into_response() } -/// Wait until the dialog window's frontend signals via the -/// `dialog_window_ready` Tauri command that its `dialog:show` and -/// `ui:ping` listeners are registered. Times out after -/// `DIALOG_READY_TIMEOUT` and returns either way — the caller still -/// emits, falling back to the existing ack/reload contract if the -/// frontend turns out to be slower than expected. -/// -/// Called twice in the render path: once before the initial emit -/// (covers the cold-start race when the window is built fresh), once -/// after a webview reload (covers the same race after the recovery -/// path tears down the JS state). -const DIALOG_READY_TIMEOUT: Duration = Duration::from_millis(3000); - -async fn wait_for_dialog_ready(app: &AppHandle, phase: &str) { - let Some(tx_state) = app.try_state::>>() - else { - trace(&format!("render: dialog_ready_tx state missing ({phase})")); - return; - }; - let mut rx = tx_state.inner().subscribe(); - if *rx.borrow() { - trace(&format!("render: dialog already ready ({phase})")); - return; - } - let started = std::time::Instant::now(); - let waited = tokio::time::timeout(DIALOG_READY_TIMEOUT, async { - while !*rx.borrow_and_update() { - if rx.changed().await.is_err() { - break; - } - } - }) - .await; - if waited.is_ok() && *rx.borrow() { - trace(&format!( - "render: dialog ready ({phase}) after {:?}", - started.elapsed() - )); - } else { - trace(&format!( - "render: dialog-ready timeout ({phase}) after {:?} — proceeding anyway", - started.elapsed() - )); - } -} - -/// Surface the dialog window for the incoming render. If the window -/// already exists, show + focus + unminimize + resize to fit this -/// spec; otherwise build it at the spec-derived inner size. -/// All Tauri window operations have to run on the main thread, so we -/// hop there via `run_on_main_thread`. -fn surface_main_window(app: &AppHandle, id: &str, size: (f64, f64)) { - let app_for_show = app.clone(); - let id_for_log = id.to_string(); - let rc = app.clone().run_on_main_thread(move || { - trace(&format!( - "render: main-thread callback id={} size=({:.0},{:.0})", - id_for_log, size.0, size.1 - )); - match crate::ensure_dialog_window(&app_for_show, size) { - Ok(_win) => { - trace("render: main-thread dialog window ready (show/build)"); - } - Err(e) => { - trace(&format!("render: main-thread dialog window FAILED: {e}")); - } - } - }); - trace(&format!("render: run_on_main_thread returned {:?}", rc.is_ok())); -} - -/// Reload the main webview to recover from a stuck JS event loop. Tears -/// down the JS side (DOM, listeners, setIntervals) and re-runs the Svelte -/// app from scratch — Tauri's `webview.reload()` is exactly this. We use -/// it as the recreate path because it's lighter than destroying and -/// rebuilding the window via `WebviewWindowBuilder` and recovers from the -/// same class of failure. -fn reload_main_webview(app: &AppHandle) { - let app_for_reload = app.clone(); - let _ = app.clone().run_on_main_thread(move || { - if let Some(win) = app_for_reload.get_webview_window(crate::DIALOG_WINDOW_LABEL) { - trace("render: reloading dialog webview"); - if let Err(e) = win.eval("location.reload()") { - trace(&format!("render: reload eval failed: {e}")); - } - } else { - trace("render: reload requested but main window is MISSING"); - } - }); -} - #[cfg(test)] mod validate_tests { use super::validate_spec; @@ -1219,10 +963,14 @@ mod render_guard_tests { // window-destroy half needs a Tauri app, so these cover the registry half // (`app: None`) — the half that produces the 409. + fn reg(ds: &DialogState) -> (String, tokio::sync::oneshot::Receiver) { + ds.register_dialog(serde_json::json!({"kind": "confirm"}), None, None, 0) + } + #[test] fn armed_guard_drop_frees_registry_slot() { let ds = Arc::new(DialogState::new()); - let (id, result_rx, _ack) = ds.try_register().expect("first register is free"); + let (id, result_rx) = reg(&ds); assert_eq!(ds.stats().orphan_count, 1); { let _guard = RenderGuard { @@ -1233,9 +981,8 @@ mod render_guard_tests { }; // future "dropped" here } - // Slot freed → the next render would NOT get a 409. + // Slot freed → a later render isn't blocked behind a leaked entry. assert_eq!(ds.stats().orphan_count, 0); - assert!(ds.try_register().is_ok(), "registry is free again after guard cleanup"); // The awaiter observes a cancelled terminal result, not a hang. let r = result_rx.blocking_recv().expect("result_tx sent on cancel"); assert!(r.cancelled); @@ -1246,7 +993,7 @@ mod render_guard_tests { // The normal terminal path disarms after its own teardown; the guard // must then do nothing (no double-cancel, no spurious slot churn). let ds = Arc::new(DialogState::new()); - let (id, _result_rx, _ack) = ds.try_register().unwrap(); + let (id, _result_rx) = reg(&ds); { let mut guard = RenderGuard { id: id.clone(), diff --git a/companion/src-tauri/src/lib.rs b/companion/src-tauri/src/lib.rs index a74c5bb..0af3e7a 100644 --- a/companion/src-tauri/src/lib.rs +++ b/companion/src-tauri/src/lib.rs @@ -45,16 +45,18 @@ fn dialog_cancel( Ok(()) } -/// Frontend confirms it received the matching `dialog:show` event. The -/// `/render` handler waits up to 500 ms for this before assuming the WebView -/// event loop is dead and triggering a recreate. +/// Multi-window pull model (Step 4): the per-id dialog window fetches its own +/// render payload on mount, keyed by its window label (= the dialog id). This +/// replaces the old `dialog:show` emit + `dialog_window_ready` ack handshake — +/// the frontend initiates, so there is no event-before-listener race to guard. +/// Returns `None` if the dialog is already gone (resolved/evicted), and the +/// window closes itself. #[tauri::command] -fn dialog_received( +fn get_dialog_spec( state: tauri::State<'_, Arc>, id: String, -) -> Result<(), String> { - state.ack(&id); - Ok(()) +) -> Result, String> { + Ok(state.get_request(&id)) } /// Frontend response to a `ui:ping` event from `/health`. Same shape as @@ -68,19 +70,30 @@ fn ui_pong( Ok(()) } -/// Frontend signals that the dialog window is mounted and its -/// `dialog:show` / `ui:ping` listeners are registered. The render -/// path on the Rust side waits on this watch *before* emitting, so -/// a freshly-built dialog window never receives a `dialog:show` -/// event before the listener is up. Without this handshake we hit -/// the 500 ms ack timeout, reload the WebView, and lose the user's -/// dialog (the failure mode reported on 2026-05-03). -#[tauri::command] -fn dialog_window_ready( - tx: tauri::State<'_, Arc>>, -) -> Result<(), String> { - let _ = tx.send(true); - Ok(()) +/// A dialog window's label IS its dialog id (Step 4 multi-window): any window +/// that isn't the setup window is a dialog window. There is no longer a single +/// reused `DIALOG_WINDOW_LABEL` window. +fn is_dialog_window_label(label: &str) -> bool { + label != SETUP_WINDOW_LABEL +} + +/// macOS: drop back to Accessory (no Dock icon) once no dialog window remains +/// open *other than* `except` (the one currently being torn down — `destroy()` +/// may not have removed it from the window list yet) and the setup window is +/// hidden. Matches the Regular-mode promote in `build_dialog_window`. +#[cfg(target_os = "macos")] +fn demote_if_no_dialogs_except(app: &tauri::AppHandle, except: &str) { + let setup_open = app + .get_webview_window(SETUP_WINDOW_LABEL) + .and_then(|w| w.is_visible().ok()) + .unwrap_or(false); + let other_dialog_open = app + .webview_windows() + .keys() + .any(|l| is_dialog_window_label(l) && l != except); + if !other_dialog_open && !setup_open { + let _ = app.set_activation_policy(tauri::ActivationPolicy::Accessory); + } } #[tauri::command] @@ -99,72 +112,61 @@ async fn close_window(window: tauri::WebviewWindow) -> Result<(), String> { let _ = window.close(); log::debug!("[aiui] close_window: closed {label}"); - // If that was the dialog window and no setup window is open, - // demote the app back to Accessory mode so we don't permanently - // grow a Dock icon. `ensure_dialog_window` promotes us to Regular + // If a dialog window just closed and nothing else needs the Dock icon, + // demote back to Accessory. `build_dialog_window` promoted us to Regular // for the dialog's lifetime; this is the matching demote. #[cfg(target_os = "macos")] - if label == DIALOG_WINDOW_LABEL { - let setup_open = app - .get_webview_window(SETUP_WINDOW_LABEL) - .and_then(|w| w.is_visible().ok()) - .unwrap_or(false); - if !setup_open { - let _ = app.set_activation_policy(tauri::ActivationPolicy::Accessory); - } + if is_dialog_window_label(&label) { + demote_if_no_dialogs_except(&app, &label); } Ok(()) } -/// Authoritatively tear down the dialog window from the Rust side -/// (v0.4.46, Bug B). Uses `destroy()` (immediate) rather than `close()` -/// so it bypasses the `CloseRequested` → frontend round-trip that could -/// strand an empty window when the WebView's handler failed to complete -/// the close. This is the single teardown point for the dialog window: -/// the `/render` handler calls it once a render reaches *any* terminal -/// outcome (submit, cancel, X-close, TTL, channel-drop), so the window -/// can never outlive the dialog it was showing. Idempotent — a no-op -/// when the window is already gone. -pub(crate) fn destroy_dialog_window(app: &tauri::AppHandle) { - if let Some(win) = app.get_webview_window(DIALOG_WINDOW_LABEL) { +/// Authoritatively tear down the dialog window with label `id` from the Rust +/// side (v0.4.46, Bug B; Step 4: now per-id). Uses `destroy()` (immediate) +/// rather than `close()` so it bypasses the `CloseRequested` → frontend +/// round-trip that could strand an empty window. The `/render` handler calls +/// it once that render reaches *any* terminal outcome (submit, cancel, +/// X-close, TTL, channel-drop), so a window can never outlive the dialog it +/// was showing. Idempotent — a no-op when the window is already gone. +pub(crate) fn destroy_dialog_window(app: &tauri::AppHandle, id: &str) { + if let Some(win) = app.get_webview_window(id) { let _ = win.destroy(); } - // Matching demote for `ensure_dialog_window`'s Regular-mode promote: - // drop back to Accessory once the dialog is gone, unless the setup - // window is still up. + // Matching demote for `build_dialog_window`'s Regular-mode promote: drop + // back to Accessory once the last dialog is gone (ignoring the one we just + // destroyed, which may still be in the window list) and setup is hidden. #[cfg(target_os = "macos")] - { - let setup_open = app - .get_webview_window(SETUP_WINDOW_LABEL) - .and_then(|w| w.is_visible().ok()) - .unwrap_or(false); - if !setup_open { - let _ = app.set_activation_policy(tauri::ActivationPolicy::Accessory); - } - } + demote_if_no_dialogs_except(app, id); } -/// Belt-and-suspenders invariant (v0.4.46, Bug B+): a dialog window may -/// only exist while a dialog is pending in the registry. If a window is -/// found with an empty registry, it's a stranded empty window — destroy -/// it. Cheap (one mutex read + a window lookup); called on app -/// re-activation, exactly when a user would otherwise notice a leftover -/// empty frame. +/// Belt-and-suspenders invariant (v0.4.46, Bug B+; Step 4: per-id): a dialog +/// window may only exist while its dialog is pending in the registry. Any +/// dialog-labelled window whose id is no longer registered is a stranded +/// empty window — destroy it. Called on app re-activation, exactly when a +/// user would otherwise notice a leftover empty frame. /// -/// Currently wired only to macOS `RunEvent::Reopen`; other platforms -/// have no trigger yet (Windows surfaces the existing window via the -/// single-instance plugin), so allow it to be unused there instead of -/// `#[cfg]`-gating the whole fn — keeps it ready for a future Windows -/// hook without tripping CI's `-D warnings` dead-code check. +/// Currently wired only to macOS `RunEvent::Reopen`; other platforms have no +/// trigger yet (Windows surfaces the existing window via the single-instance +/// plugin), so allow it to be unused there instead of `#[cfg]`-gating the +/// whole fn — keeps it ready for a future Windows hook without tripping CI's +/// `-D warnings` dead-code check. #[cfg_attr(not(target_os = "macos"), allow(dead_code))] pub(crate) fn sweep_orphan_dialog_window(app: &tauri::AppHandle) { - let pending = app - .try_state::>() - .map(|s| s.stats().orphan_count) - .unwrap_or(0); - if pending == 0 && app.get_webview_window(DIALOG_WINDOW_LABEL).is_some() { - log::debug!("[aiui] sweep: destroying orphan dialog window (no pending dialog)"); - destroy_dialog_window(app); + let Some(state) = app.try_state::>() else { + return; + }; + // Collect dialog-window labels (ids) whose dialog is no longer registered. + let orphans: Vec = app + .webview_windows() + .keys() + .filter(|l| is_dialog_window_label(l)) + .filter(|l| state.get_request(l).is_none()) + .cloned() + .collect(); + for id in orphans { + log::debug!("[aiui] sweep: destroying orphan dialog window id={id} (no pending dialog)"); + destroy_dialog_window(app, &id); } } @@ -201,10 +203,14 @@ async fn surface_for_dialog(app: tauri::AppHandle) -> Result<(), String> { // The update dialog is surfaced from whichever window is alive when // the check fires — usually the setup window (frontend triggers it // from there). We just need *some* visible window to attach the OS - // dialog to. - let win = app - .get_webview_window(SETUP_WINDOW_LABEL) - .or_else(|| app.get_webview_window(DIALOG_WINDOW_LABEL)); + // dialog to; fall back to any open dialog window (Step 4: dialog + // windows are labelled by id, so there's no single fixed label). + let win = app.get_webview_window(SETUP_WINDOW_LABEL).or_else(|| { + app.webview_windows() + .into_iter() + .find(|(label, _)| is_dialog_window_label(label)) + .map(|(_, w)| w) + }); if let Some(win) = win { let _ = win.show(); let _ = win.set_focus(); @@ -933,96 +939,53 @@ pub(crate) fn build_setup_window( /// `dialog::estimate_dialog_size`. The window is resizable, so the user /// can drag past these defaults — we just pick a sensible starting /// geometry given what the agent asked us to render. -pub(crate) fn ensure_dialog_window( +/// Build a fresh dialog window labelled by the dialog `id` (Step 4 +/// multi-window: one window per render, never reused — N may be open at once). +/// It loads `dialog.html`, which reads its own window label (= id) and *pulls* +/// the render payload via `get_dialog_spec` on mount. Promotes the app to +/// Regular so the window fronts above Claude Desktop (in Accessory mode macOS +/// won't bring our windows forward even with `set_focus()`); +/// `close_window` / `destroy_dialog_window` demote back to Accessory once the +/// last dialog is gone. +pub(crate) fn build_dialog_window( app: &tauri::AppHandle, + id: &str, size: (f64, f64), ) -> tauri::Result { - // Promote the app from Accessory to Regular for the duration of the - // dialog. In Accessory mode (LSUIElement-style daemon, no Dock icon) - // macOS won't bring our windows to the front above other apps even - // with `set_focus()` — the agent renders a dialog and the user - // doesn't see it because Claude Desktop covers it. Promoting to - // Regular for the dialog window restores normal front/focus - // behaviour; we drop back to Accessory in `close_window` once the - // dialog finishes so we don't permanently grow a Dock icon. #[cfg(target_os = "macos")] { let _ = app.set_activation_policy(tauri::ActivationPolicy::Regular); } - if let Some(win) = app.get_webview_window(DIALOG_WINDOW_LABEL) { - // Resize to fit the new spec before surfacing. Without this, - // a confirm rendered after a long form would keep the form's - // tall geometry (and vice versa). - let _ = win.set_size(tauri::LogicalSize::new(size.0, size.1)); - let _ = win.show(); - let _ = win.set_focus(); - let _ = win.unminimize(); - // Briefly mark the window always-on-top to win against any - // app that's grabbed focus in the meantime, then lift the - // flag so the user can naturally Cmd+Tab away later. 800 ms - // is enough for the activation to settle without leaving a - // sticky front-most window. - let _ = win.set_always_on_top(true); - let app_for_lift = app.clone(); - std::thread::spawn(move || { - std::thread::sleep(std::time::Duration::from_millis(800)); - if let Some(w) = app_for_lift.get_webview_window(DIALOG_WINDOW_LABEL) { - let _ = w.set_always_on_top(false); - } - }); - return Ok(win); - } - // Window is being built fresh — its frontend listeners aren't up - // yet. Reset the ready flag so the render path waits for the - // `dialog_window_ready` signal before emitting `dialog:show`. - if let Some(tx) = app.try_state::>>() { - let _ = tx.inner().send(false); - } - WebviewWindowBuilder::new( - app, - DIALOG_WINDOW_LABEL, - WebviewUrl::App("dialog.html".into()), - ) - .title("aiui") - // Initial size from `estimate_dialog_size` — we widen for - // wireframe/mermaid/table and grow vertically for long forms, - // clamped to (1100, 900). Resizable so the user always has the - // last word; min size keeps the dialog usable but prevents - // accidental sub-icon collapse. v0.4.40. - .inner_size(size.0, size.1) - .min_inner_size(360.0, 320.0) - .resizable(true) - .center() - // Native, fully-visible title bar so macOS handles window-drag - // for us. Tauri's `data-tauri-drag-region` HTML attribute and - // Chromium's `-webkit-app-region: drag` CSS are *both* unreliable - // on Tauri 2 + WKWebView (macOS 26): the first sometimes drops - // mousedown depending on z-order, the second is a Chromium-only - // CSS property that WKWebView doesn't honour at all. The only - // robust path is to let macOS run its own title-bar drag, which - // means a visible title bar (the previous "Overlay + hiddenTitle" - // setup hid the title-bar pixels but kept its drag behaviour - // half-broken). We accept the slightly-less-flush look in - // exchange for a window the user can actually move. - .decorations(true) - .disable_drag_drop_handler() - .visible(true) - .always_on_top(true) - .build() - .inspect(|_win| { - // Fresh dialog windows also get the same lift-after-800 ms - // treatment as the reused-window branch above. The - // always_on_top flag from the builder ensures the window - // appears above everything; we drop it shortly after so - // Cmd+Tab works normally afterwards. - let app_for_lift = app.clone(); - std::thread::spawn(move || { - std::thread::sleep(std::time::Duration::from_millis(800)); - if let Some(w) = app_for_lift.get_webview_window(DIALOG_WINDOW_LABEL) { - let _ = w.set_always_on_top(false); - } - }); - }) + let id_for_lift = id.to_string(); + WebviewWindowBuilder::new(app, id, WebviewUrl::App("dialog.html".into())) + .title("aiui") + // Initial size from `estimate_dialog_size` — we widen for + // wireframe/mermaid/table and grow vertically for long forms, + // clamped to (1100, 900). Resizable so the user always has the last + // word; min size keeps the dialog usable. v0.4.40. + .inner_size(size.0, size.1) + .min_inner_size(360.0, 320.0) + .resizable(true) + .center() + // Native, fully-visible title bar so macOS handles window-drag for us + // (Tauri's HTML drag-region and the `-webkit-app-region` CSS are both + // unreliable on Tauri 2 + WKWebView). v0.4.40. + .decorations(true) + .disable_drag_drop_handler() + .visible(true) + .always_on_top(true) + .build() + .inspect(|_win| { + // Briefly always-on-top to win the focus race, then lift it so + // Cmd+Tab works normally afterwards. + let app_for_lift = app.clone(); + std::thread::spawn(move || { + std::thread::sleep(std::time::Duration::from_millis(800)); + if let Some(w) = app_for_lift.get_webview_window(&id_for_lift) { + let _ = w.set_always_on_top(false); + } + }); + }) } /// True when no aiui window is currently visible to the user. Used by @@ -1212,17 +1175,9 @@ pub fn run() { // it apart from `http_error` (same underlying type). let pending_update = Arc::new(PendingUpdate::default()); - // Window-ready handshake: the dialog window's frontend signals - // here (via the `dialog_window_ready` Tauri command) once its - // listeners are wired up. The render path *waits* on this watch - // before emitting `dialog:show`, so a freshly-built dialog window - // never receives an event before its listener is registered. The - // 0.4.30 fix — without it, a 500 ms ack timeout could fire before - // the WebView even finished mounting Svelte (especially on the - // very first render of a session, when the window is built fresh - // and Vite has to load the bundle). - let (dialog_ready_tx, _dialog_ready_rx) = tokio::sync::watch::channel(false); - let dialog_ready_tx = Arc::new(dialog_ready_tx); + // (Step 4: the old `dialog_window_ready` watch handshake is gone — the + // per-id dialog window pulls its spec via `get_dialog_spec` on mount, so + // there's no emit to race and nothing to wait on.) let rt = tokio::runtime::Builder::new_multi_thread() .enable_all() @@ -1278,13 +1233,11 @@ pub fn run() { .manage(tunnel_mgr.clone()) .manage(http_error.clone()) .manage(pending_update.clone()) - .manage(dialog_ready_tx.clone()) .invoke_handler(tauri::generate_handler![ dialog_submit, dialog_cancel, - dialog_received, + get_dialog_spec, ui_pong, - dialog_window_ready, close_window, surface_for_dialog, is_update_safe_to_install, @@ -1606,27 +1559,22 @@ pub fn run() { if let tauri::WindowEvent::CloseRequested { api, .. } = event { let app = window.app_handle(); let closed_label = window.label().to_string(); - if closed_label == DIALOG_WINDOW_LABEL { - // User closed the dialog window with the native X (or - // ⌘W). Resolve any in-flight `/render` as cancelled - // right here in Rust — we no longer depend on a - // frontend CloseRequested handler, which in 0.4.45 - // could `preventDefault()` and then fail to complete - // the close, stranding an empty, unclosable window - // (Bug B, the 2026-05-29 overnight report). We do NOT - // prevent the close: the window is allowed to go away. - // The awaiting `/render` will run its end-of-handler - // `destroy_dialog_window` (a no-op by then). + if is_dialog_window_label(&closed_label) { + // User closed THIS dialog window with the native X (or + // ⌘W). Multi-window (Step 4): the window label is the + // dialog id, so cancel exactly that dialog — never the + // others that may be open for parallel sessions. Resolve + // it as cancelled right here in Rust (we no longer depend + // on a frontend CloseRequested handler, which in 0.4.45 + // could `preventDefault()` then fail to complete the + // close, stranding an empty window — Bug B). We do NOT + // prevent the close; the awaiting `/render` runs its + // end-of-handler `destroy_dialog_window` (a no-op by then). if let Some(ds) = app.try_state::>() { - let n = ds.cancel_all("window_closed"); - if n > 0 { - log::debug!( - "[aiui] dialog window X-closed — cancelled {n} pending dialog(s)" - ); - } + ds.cancel(&closed_label); } log::debug!( - "[aiui] dialog window closed — staying alive for further tool calls" + "[aiui] dialog window {closed_label} X-closed — cancelled its dialog, host stays alive" ); return; } diff --git a/companion/src-tauri/src/mcp.rs b/companion/src-tauri/src/mcp.rs index 0d52451..c998587 100644 --- a/companion/src-tauri/src/mcp.rs +++ b/companion/src-tauri/src/mcp.rs @@ -275,6 +275,7 @@ fn tools_list() -> Value { "required": ["title"], "properties": { "title": { "type": "string", "description": "Decision as a question, ≤ 10 words." }, + "session": { "type": "string", "description": "Optional short human label for the session this dialog belongs to (project/task name). Shown in the window chrome so the user can tell parallel dialogs apart." }, "message": { "type": "string", "description": "One sentence stating the concrete consequence." }, "header": { "type": "string", "description": "Short chip above the title (≤ 14 chars)." }, "destructive": { "type": "boolean", "default": false, "description": "Red confirm button — for deletions/rollbacks only." }, @@ -300,6 +301,7 @@ fn tools_list() -> Value { "type": "object", "required": ["question", "options"], "properties": { + "session": { "type": "string", "description": "Optional short human label for the session this dialog belongs to (project/task name). Shown in the window chrome so the user can tell parallel dialogs apart." }, "question": { "type": "string", "description": "Full question, imperative or interrogative." }, "options": { "type": "array", @@ -327,6 +329,7 @@ fn tools_list() -> Value { "type": "object", "required": ["title"], "properties": { + "session": { "type": "string", "description": "Optional short human label for the session this dialog belongs to (project/task name). Shown in the window chrome so the user can tell parallel dialogs apart." }, "title": { "type": "string" }, "fields": { "type": "array", "items": { "type": "object" }, "description": "Flat field list. Use this OR `tabs`, not both." }, "tabs": { @@ -533,6 +536,7 @@ async fn tools_call( "cancelLabel": args.get("cancel_label"), "image": args.get("image") }), + args.get("session").and_then(|v| v.as_str()).map(String::from), cfg, http, ) @@ -550,6 +554,7 @@ async fn tools_call( "multiSelect": args.get("multi_select").and_then(|v| v.as_bool()).unwrap_or(false), "allowOther": args.get("allow_other").and_then(|v| v.as_bool()).unwrap_or(false) }), + args.get("session").and_then(|v| v.as_str()).map(String::from), cfg, http, ) @@ -570,6 +575,7 @@ async fn tools_call( "submitLabel": args.get("submit_label"), "cancelLabel": args.get("cancel_label") }), + args.get("session").and_then(|v| v.as_str()).map(String::from), cfg, http, ) @@ -639,6 +645,7 @@ enum RenderError { async fn render_dialog( spec: Value, + session: Option, cfg: &AppConfig, http: &reqwest::Client, ) -> Result { @@ -654,7 +661,10 @@ async fn render_dialog( // would never see the remote's filesystem. let mut spec = spec; crate::imageresolve::resolve_local_paths(&mut spec); - let body = json!({ "spec": spec }); + // Step 4 (I8): forward the optional caller `session` label. This is the + // local bridge, so there is no `session_origin` (the companion treats an + // absent origin as local). + let body = json!({ "spec": spec, "session": session }); // Async render (Step 3): POST opts in via `x-aiui-async`; the companion // registers + surfaces the dialog and returns immediately with // `{id, ttl_secs}` (202). We then poll `GET /render/{id}` in bounded diff --git a/companion/src/i18n/de.json b/companion/src/i18n/de.json index 045f44c..65f58bb 100644 --- a/companion/src/i18n/de.json +++ b/companion/src/i18n/de.json @@ -84,6 +84,7 @@ "confirm.no": "Nein", "unknown_kind": "Unbekannter Widget-Typ: {kind}", "close": "Schließen", + "session_aria": "Sitzung, zu der dieser Dialog gehört", "ttl": { "yellow": "Noch ca. {countdown} Min zum Abschicken — danach werden die Eingaben automatisch verworfen.", "red": "Weniger als {countdown} bis zum automatischen Abbruch. Bitte jetzt abschicken oder abbrechen.", diff --git a/companion/src/i18n/en.json b/companion/src/i18n/en.json index 5a697b7..6f160fe 100644 --- a/companion/src/i18n/en.json +++ b/companion/src/i18n/en.json @@ -84,6 +84,7 @@ "confirm.no": "No", "unknown_kind": "Unknown widget kind: {kind}", "close": "Close", + "session_aria": "Session this dialog belongs to", "ttl": { "yellow": "About {countdown} min left to submit — after that your input will be discarded automatically.", "red": "Less than {countdown} until auto-cancel. Please submit or cancel now.", diff --git a/companion/src/lib/DialogShell.svelte b/companion/src/lib/DialogShell.svelte index ecabdd8..fc90900 100644 --- a/companion/src/lib/DialogShell.svelte +++ b/companion/src/lib/DialogShell.svelte @@ -1,13 +1,22 @@ + +
+
+ {#if spec.header}{spec.header}{/if} + {#if spec.title}

{spec.title}

{/if} + {#if spec.description}

{spec.description}

{/if} + + +
+ +
+ {$_("dialog.gallery.decided", { values: { n: decidedCount, total: spec.items.length } })} + + +
+
+ + diff --git a/docs/skill.md b/docs/skill.md index 8a3107f..bca5d3a 100644 --- a/docs/skill.md +++ b/docs/skill.md @@ -65,6 +65,7 @@ Skip the dialog for content the user reads, doesn't answer: | Pick one of N images ("A or B or C") | `ask` with `thumbnail` per option | | Multi-field input, multi-action footer | `form` | | Pick one of *many* images (e.g. 12 logo variants) | `form` with `image_grid` | +| Per-item verdict on a *batch* of images/videos ("approve/revise/skip each") | `gallery` | | Single free-text answer | just ask in chat | | More than 8 fields | split into multiple `form` calls; do not cram one dialog | @@ -237,6 +238,34 @@ thumbnail candidates, asset triage. Spec: `images: [{value, src, label?}]`, `multi_select?`, `columns?` (default 3). Result: `{selected: [values]}`. Each `src` follows the same rules as `image` — see below. +`image_grid` is a *picker* — one (or N) selected out of many. When you +instead need a **separate verdict per item** — approve this, revise that, +skip the third, with an optional note each — use the `gallery` tool below. + +## Batch review: `gallery` + +A standalone tool (not a `form` field), for reviewing a *batch* of images +and/or videos and collecting one decision per item in a single window — +instead of firing `confirm` once per asset. + +Spec: `items: [{value, src?, label?, detail?, max_height?}]`, +`actions?` (per-item buttons, default Approve / Revise / Skip), +`comment?` (free-text field per item), `columns?` (default responsive). +Each item's `value` must be non-empty and unique — it keys the result. +`src` follows the same resolution rules as `image`; **videos** (a +`data:video/` URL or a `.mp4`/`.mov`/`.m4v`/`.webm` source) render with +native `