From 0284a26a83536e0650c0039e2d12013dd2a077aa Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Mon, 23 Mar 2026 02:10:51 +0000 Subject: [PATCH 01/32] Initial task doc --- .../task-x-quick-entry-ai-processing.md | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 docs/tasks-todo/task-x-quick-entry-ai-processing.md diff --git a/docs/tasks-todo/task-x-quick-entry-ai-processing.md b/docs/tasks-todo/task-x-quick-entry-ai-processing.md new file mode 100644 index 00000000..ba6d91e6 --- /dev/null +++ b/docs/tasks-todo/task-x-quick-entry-ai-processing.md @@ -0,0 +1,179 @@ +# Quick Entry AI Processing (Smart Dictation) + +**GitHub Issue:** https://github.com/dannysmith/taskdn/issues/30 +**Product:** tdn-desktop only + +## Overview + +Add AI-powered processing to the quick entry pane so users can dictate or type free-form natural language (e.g. "Create a new task in the Jengu project with a due date three weeks from now to review the meeting notes") and have it intelligently parsed into structured task fields (title, body, project, area, dates, status). + +This does not involve voice-to-text transcription — we assume users have a transcription tool (e.g. macOS dictation). This is about taking transcribed/typed text and intelligently populating the quick entry form so the user can review and confirm before saving. + +## Requirements + +### From the GitHub Issue + +- The contents of the title input field are sent to a local LLM with a short prompt, which returns structured task data for pre-populating the form. +- The prompt includes a list of current areas and projects, context about "now" (today's date), and instructions for lightly cleaning user input, extracting frontmatter fields, and generating a suitable title. +- The raw input text is always included in the body of the task doc. +- The prompt is not user-customizable. +- V1 supports only Apple Intelligence. +- There is no intent to ship downloadable LLMs or provide an interface for managing them for now. + +### Product Requirements (from discussion) + +- **Trigger:** Explicit keyboard shortcut (`Cmd+Shift+A`) + a visible button in the UI. The shortcut is only active when the quick entry pane is visible. +- **UX flow:** User opens quick pane → types/dictates free-form text → triggers AI processing → form fields are populated → user reviews and saves normally. +- **Body behavior:** The raw dictated/typed text is preserved in the body field, unless the AI-generated title is identical to the raw input (in which case no body is added, since nothing was transformed). +- **Invisible when unavailable:** If Apple Intelligence is not available (wrong platform, older macOS, not enabled), the feature must be completely invisible — no button, no shortcut, no trace. It should appear as if the feature doesn't exist. +- **Future provider support:** Don't prematurely optimise for Ollama or other providers, but at decision points, prefer architecture that wouldn't make adding them painful later. Keep the Rust-level interface clean (text + context in, structured result out). + +### UI Placement + +The quick entry pane is a compact floating card with: title input (top), metadata row with status/dates (middle), footer with project/area selectors + cancel/save (bottom). The AI processing button should sit adjacent to the title input area since that's where the action happens. + +## Background: How Handy Does This + +The Handy codebase (`~/dev/handy`) has a production-grade Apple Intelligence integration for post-processing transcriptions. It provides a proven Tauri ↔ Swift bridge pattern that we should follow closely. + +### Architecture + +``` +React frontend + → Tauri command (Rust) + → C FFI (unsafe) + → Swift FoundationModels API + → On-device ~3B model inference + ← Structured response (@Generable) + ← Result + ← Populate form fields +``` + +### Key Files to Reference in Handy + +| File | What it does | +|------|-------------| +| `src-tauri/swift/apple_intelligence.swift` | Real implementation (~144 lines). `@Generable` struct, `LanguageModelSession`, structured output with plain-text fallback, `DispatchSemaphore` for async→sync bridge | +| `src-tauri/swift/apple_intelligence_stub.swift` | Stub compiled when SDK lacks FoundationModels (~46 lines) | +| `src-tauri/swift/apple_intelligence_bridge.h` | C header defining `AppleLLMResponse` struct and FFI function signatures | +| `src-tauri/src/apple_intelligence.rs` | Rust wrapper with safe abstractions over the C FFI | +| `src-tauri/build.rs` | `build_apple_intelligence_bridge()` — SDK detection via `xcrun`, `swiftc` compilation, `libtool` for static lib, weak framework linking | + +### Critical Gotchas Discovered by Handy + +1. **SIGABRT on init:** Cannot access `SystemLanguageModel.default` during app initialization on macOS 26 — must defer the availability check to runtime (when the user actually tries to use the feature). +2. **Async→sync bridge:** Swift `async/await` called from synchronous Rust FFI. Uses `DispatchSemaphore` + `Task.detached(priority: .userInitiated)` with a thread-safe `ResultBox`. +3. **Weak linking:** Must use `-weak_framework FoundationModels` so the app launches on older macOS. Deployment target is macOS 11.0 with `@available(macOS 26.0, *)` runtime checks. +4. **Invisible Unicode:** LLMs sometimes insert zero-width spaces (`\u{200B}`, `\u{200C}`, `\u{200D}`, `\u{FEFF}`) — strip them from output. +5. **Structured output fallback:** `@Generable` can fail — always have a plain-text fallback path. +6. **Build-time SDK detection:** Check for `FoundationModels.framework` in the SDK path. If absent, compile the stub instead. + +## Implementation Plan + +### Phase 1: Swift Bridge (Apple Intelligence integration layer) + +Set up the Tauri ↔ Swift FFI bridge, closely following Handy's pattern. + +**Files to create:** +- `src-tauri/swift/apple_intelligence.swift` — The `@Generable` struct for parsed tasks, inference function, availability check +- `src-tauri/swift/apple_intelligence_stub.swift` — Fallback for builds without FoundationModels SDK +- `src-tauri/swift/apple_intelligence_bridge.h` — C-compatible struct and function declarations + +**Files to modify:** +- `src-tauri/build.rs` — Add `build_apple_intelligence_bridge()` (can adapt directly from Handy's `build.rs`) + +**Key design detail — the `@Generable` struct:** + +```swift +@Generable +struct ParsedTask: Sendable { + @Guide(description: "A concise task title summarizing the request") + let title: String + + @Guide(description: "Additional context or notes, empty string if none") + let body: String + + @Guide(description: "Task status: inbox, ready, in-progress, or icebox. Default inbox.") + let status: String + + @Guide(description: "Relative date expression for due date, e.g. 'in 3 weeks', empty string if none") + let dueExpression: String + + @Guide(description: "Relative date expression for scheduled date, empty string if none") + let scheduledExpression: String + + @Guide(description: "Relative date expression for defer-until date, empty string if none") + let deferUntilExpression: String + + @Guide(description: "Exact project name from the available list, empty string if none") + let project: String + + @Guide(description: "Exact area name from the available list, empty string if none") + let area: String +} +``` + +**Key design detail — date handling:** The ~3B model is unreliable at date arithmetic. Have the LLM extract the *relative date expression* (e.g. "three weeks from now", "next Tuesday", "end of April") and do the actual date resolution in Rust. This avoids wrong dates, which would be the most user-visible error. + +**Key design detail — project/area matching:** The `@Guide(.anyOf([...]))` constraint requires compile-time values, but project/area names are dynamic per-user. Instead: list valid names in the system prompt instructions, use `@Guide(description:)` for guidance, and validate/fuzzy-match the returned name against the actual list in Rust. If no match, leave the field empty for the user to set manually. + +### Phase 2: Rust Layer (command, prompt building, response handling) + +**Files to create:** +- `src-tauri/src/apple_intelligence.rs` — Safe Rust wrapper over the C FFI (adapt from Handy) + +**Files to modify:** +- `src-tauri/src/commands/` — New Tauri command `process_quick_entry_text` +- `src-tauri/src/lib.rs` or `mod.rs` — Register the new module and command + +**The Tauri command should:** +1. Accept: raw text, list of area names+IDs, list of project names+IDs +2. Build system prompt: role description, today's date, available project/area names, formatting rules +3. Call Swift FFI with system prompt + raw text +4. Deserialize the `ParsedTask` response (JSON) +5. Resolve relative date expressions to `YYYY-MM-DD` dates +6. Match returned project/area names to actual IDs (exact match first, then fuzzy) +7. Return a typed result struct with all resolved fields + +**System prompt template (built in Rust):** + +``` +You are a task parser. Extract structured task fields from free-form input. +Today is {date} ({day_of_week}). + +Available projects: {comma-separated names} +Available areas: {comma-separated names} + +Rules: +- Create a concise, actionable title (not the raw input verbatim) +- Match project/area names exactly from the lists above, or return empty +- For dates, extract the relative expression as spoken (e.g. "in 3 weeks") +- Default status to inbox unless clearly stated otherwise +- Put any detail beyond the title into the body field +``` + +**Date resolution in Rust:** Parse natural language date expressions like "next Tuesday", "in 3 weeks", "end of April" relative to today's date. Consider using a crate like `chrono` with simple pattern matching, or a lightweight NLP date parser. This can be basic at first — cover common patterns and fall back to empty if unparseable. + +### Phase 3: Frontend Integration + +**Files to modify:** +- `src/components/quick-pane/QuickPaneApp.tsx` — Add AI processing state, handler, availability check on pane open +- `src/components/quick-pane/QuickPaneTitle.tsx` — Add the AI button adjacent to the title input (conditionally rendered) +- `src/components/quick-pane/useQuickPaneKeyboard.ts` — Add `Cmd+Shift+A` shortcut + +**Behaviour:** +1. On pane open (focus event), also call `commands.checkAppleIntelligenceAvailable()`. Store result in state. If unavailable, skip rendering button and registering shortcut. +2. When triggered (button or `Cmd+Shift+A`): grab current title text, show loading state (e.g. subtle spinner on the button, disable form briefly), call `commands.processQuickEntryText(...)`. +3. On success: populate title, body (with show-body toggled on), status, dates, project, area from the response. The body should contain the original raw text. +4. On error: leave form unchanged, optionally log the error. No toast or disruptive error UI. +5. User reviews populated fields and saves normally with `Cmd+Enter`. + +**Loading state:** Keep it minimal — a spinner or pulse animation on the AI button, lasting ~1-5 seconds. Don't disable the entire form (the user might want to cancel with Escape during processing). + +### Phase 4: Testing and Polish + +- Test with various dictation styles: short commands, long rambling input, ambiguous dates, misspelled project names, non-English input +- Test availability detection: verify feature is invisible on Intel Macs, older macOS, Apple Intelligence disabled +- Test the build on machines without the FoundationModels SDK (stub compilation) +- Test edge cases: empty input, very long input (context window), input that's already a clean title +- Consider whether re-processing should be supported (user processes, edits, processes again) From 54caa38acf6e83392e0aaeb93f2f825f537c092b Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Mon, 23 Mar 2026 02:16:06 +0000 Subject: [PATCH 02/32] Tweak task doc --- .../task-x-quick-entry-ai-processing.md | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/docs/tasks-todo/task-x-quick-entry-ai-processing.md b/docs/tasks-todo/task-x-quick-entry-ai-processing.md index ba6d91e6..5485ddf2 100644 --- a/docs/tasks-todo/task-x-quick-entry-ai-processing.md +++ b/docs/tasks-todo/task-x-quick-entry-ai-processing.md @@ -93,17 +93,16 @@ struct ParsedTask: Sendable { @Guide(description: "Additional context or notes, empty string if none") let body: String - @Guide(description: "Task status: inbox, ready, in-progress, or icebox. Default inbox.") - let status: String + let status: ParsedStatus - @Guide(description: "Relative date expression for due date, e.g. 'in 3 weeks', empty string if none") - let dueExpression: String + @Guide(description: "Due date in YYYY-MM-DD format, empty string if none") + let due: String - @Guide(description: "Relative date expression for scheduled date, empty string if none") - let scheduledExpression: String + @Guide(description: "Scheduled date in YYYY-MM-DD format, empty string if none") + let scheduled: String - @Guide(description: "Relative date expression for defer-until date, empty string if none") - let deferUntilExpression: String + @Guide(description: "Defer-until date in YYYY-MM-DD format, empty string if none") + let deferUntil: String @Guide(description: "Exact project name from the available list, empty string if none") let project: String @@ -111,11 +110,22 @@ struct ParsedTask: Sendable { @Guide(description: "Exact area name from the available list, empty string if none") let area: String } + +@Generable +enum ParsedStatus { + case inbox + case icebox + case ready + case inProgress + case blocked +} ``` -**Key design detail — date handling:** The ~3B model is unreliable at date arithmetic. Have the LLM extract the *relative date expression* (e.g. "three weeks from now", "next Tuesday", "end of April") and do the actual date resolution in Rust. This avoids wrong dates, which would be the most user-visible error. +**Key design detail — status as enum:** Task statuses are known at compile time, so using a `@Generable enum` gives us constrained decoding for free — the model literally cannot output an invalid status. The enum omits `done` and `dropped` since those don't make sense for newly-created tasks. -**Key design detail — project/area matching:** The `@Guide(.anyOf([...]))` constraint requires compile-time values, but project/area names are dynamic per-user. Instead: list valid names in the system prompt instructions, use `@Guide(description:)` for guidance, and validate/fuzzy-match the returned name against the actual list in Rust. If no match, leave the field empty for the user to set manually. +**Key design detail — date handling:** The system prompt includes today's date and day of week. The LLM outputs dates directly in `YYYY-MM-DD` format. The ~3B model should handle common relative date arithmetic ("in 3 weeks", "next Tuesday", "end of April") well enough given today's date as context. If it occasionally gets a date wrong, the user corrects it during the review step — this is no worse than an empty field. Rust validates that returned date strings are valid `YYYY-MM-DD` and discards any that aren't. + +**Key design detail — project/area matching:** The `@Guide(.anyOf([...]))` constraint requires compile-time values, but project/area names are dynamic per-user. Instead: list valid names in the system prompt instructions and use `@Guide(description:)` for guidance. In Rust, validate the returned name against the actual list using case-insensitive exact match. If no match, leave the field empty for the user to set manually. ### Phase 2: Rust Layer (command, prompt building, response handling) @@ -128,11 +138,11 @@ struct ParsedTask: Sendable { **The Tauri command should:** 1. Accept: raw text, list of area names+IDs, list of project names+IDs -2. Build system prompt: role description, today's date, available project/area names, formatting rules +2. Build system prompt: role description, today's date + day of week, available project/area names, formatting rules 3. Call Swift FFI with system prompt + raw text 4. Deserialize the `ParsedTask` response (JSON) -5. Resolve relative date expressions to `YYYY-MM-DD` dates -6. Match returned project/area names to actual IDs (exact match first, then fuzzy) +5. Validate date strings are valid `YYYY-MM-DD` (discard invalid ones) +6. Match returned project/area names to actual IDs (case-insensitive exact match; no match = empty) 7. Return a typed result struct with all resolved fields **System prompt template (built in Rust):** @@ -147,13 +157,11 @@ Available areas: {comma-separated names} Rules: - Create a concise, actionable title (not the raw input verbatim) - Match project/area names exactly from the lists above, or return empty -- For dates, extract the relative expression as spoken (e.g. "in 3 weeks") +- Convert any relative dates to YYYY-MM-DD format based on today's date - Default status to inbox unless clearly stated otherwise - Put any detail beyond the title into the body field ``` -**Date resolution in Rust:** Parse natural language date expressions like "next Tuesday", "in 3 weeks", "end of April" relative to today's date. Consider using a crate like `chrono` with simple pattern matching, or a lightweight NLP date parser. This can be basic at first — cover common patterns and fall back to empty if unparseable. - ### Phase 3: Frontend Integration **Files to modify:** From d807d3e8976f6e003775c72fefa5106ad6950c08 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Mon, 23 Mar 2026 02:33:30 +0000 Subject: [PATCH 03/32] Add AI-powered quick entry processing via Apple Intelligence (#30) Adds on-device AI processing to the quick capture pane so users can dictate or type free-form text and have it parsed into structured task fields (title, body, status, dates, project, area). Uses Apple's Foundation Models framework (~3B on-device model) via a Swift FFI bridge. - Swift bridge with @Generable struct for constrained structured output - Rust wrapper with system prompt builder, date validation, and case-insensitive project/area name matching - Sparkles button in title row (visible only when AI available + text entered) - Cmd+Shift+A keyboard shortcut (only active when pane is open) - Feature is completely invisible on non-Apple-Silicon/older macOS - Weak-links FoundationModels so app still launches on older systems - Stub compilation when SDK lacks FoundationModels framework Co-Authored-By: Claude Opus 4.6 (1M context) --- tdn-desktop/src-tauri/build.rs | 137 +++++++++++ .../src-tauri/src/apple_intelligence.rs | 73 ++++++ tdn-desktop/src-tauri/src/bindings.rs | 5 +- tdn-desktop/src-tauri/src/commands/ai.rs | 223 ++++++++++++++++++ tdn-desktop/src-tauri/src/commands/mod.rs | 1 + tdn-desktop/src-tauri/src/lib.rs | 3 + .../src-tauri/swift/apple_intelligence.swift | 206 ++++++++++++++++ .../swift/apple_intelligence_bridge.h | 27 +++ .../swift/apple_intelligence_stub.swift | 38 +++ .../components/quick-pane/QuickPaneApp.tsx | 79 ++++++- .../components/quick-pane/QuickPaneTitle.tsx | 65 ++++- .../quick-pane/useQuickPaneKeyboard.ts | 12 + tdn-desktop/src/lib/bindings.ts | 36 +++ 13 files changed, 901 insertions(+), 4 deletions(-) create mode 100644 tdn-desktop/src-tauri/src/apple_intelligence.rs create mode 100644 tdn-desktop/src-tauri/src/commands/ai.rs create mode 100644 tdn-desktop/src-tauri/swift/apple_intelligence.swift create mode 100644 tdn-desktop/src-tauri/swift/apple_intelligence_bridge.h create mode 100644 tdn-desktop/src-tauri/swift/apple_intelligence_stub.swift diff --git a/tdn-desktop/src-tauri/build.rs b/tdn-desktop/src-tauri/build.rs index d860e1e6..6ad7bfbd 100644 --- a/tdn-desktop/src-tauri/build.rs +++ b/tdn-desktop/src-tauri/build.rs @@ -1,3 +1,140 @@ fn main() { + #[cfg(all(target_os = "macos", target_arch = "aarch64"))] + build_apple_intelligence_bridge(); + tauri_build::build() } + +/// Build the Swift ↔ Rust bridge for Apple Intelligence. +/// +/// Detects whether the current Xcode SDK includes the FoundationModels framework. +/// If it does, compiles the real implementation; otherwise compiles a stub that +/// returns "unavailable" for all calls. +/// +/// The app uses weak linking for FoundationModels so it can launch on older macOS +/// versions — runtime availability is checked via @available(macOS 26.0, *). +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +fn build_apple_intelligence_bridge() { + use std::path::{Path, PathBuf}; + use std::process::Command; + + const REAL_SWIFT_FILE: &str = "swift/apple_intelligence.swift"; + const STUB_SWIFT_FILE: &str = "swift/apple_intelligence_stub.swift"; + const BRIDGE_HEADER: &str = "swift/apple_intelligence_bridge.h"; + + println!("cargo:rerun-if-changed={REAL_SWIFT_FILE}"); + println!("cargo:rerun-if-changed={STUB_SWIFT_FILE}"); + println!("cargo:rerun-if-changed={BRIDGE_HEADER}"); + + let out_dir = PathBuf::from(std::env::var("OUT_DIR").expect("OUT_DIR not set")); + let object_path = out_dir.join("apple_intelligence.o"); + let static_lib_path = out_dir.join("libapple_intelligence.a"); + + let sdk_path = String::from_utf8( + Command::new("xcrun") + .args(["--sdk", "macosx", "--show-sdk-path"]) + .output() + .expect("Failed to locate macOS SDK") + .stdout, + ) + .expect("SDK path is not valid UTF-8") + .trim() + .to_string(); + + // Check if the SDK supports FoundationModels (required for Apple Intelligence) + let framework_path = + Path::new(&sdk_path).join("System/Library/Frameworks/FoundationModels.framework"); + let has_foundation_models = framework_path.exists(); + + let source_file = if has_foundation_models { + println!("cargo:warning=Building with Apple Intelligence support."); + REAL_SWIFT_FILE + } else { + println!("cargo:warning=Apple Intelligence SDK not found. Building with stubs."); + STUB_SWIFT_FILE + }; + + if !Path::new(source_file).exists() { + panic!("Source file {source_file} is missing!"); + } + + let swiftc_path = String::from_utf8( + Command::new("xcrun") + .args(["--find", "swiftc"]) + .output() + .expect("Failed to locate swiftc") + .stdout, + ) + .expect("swiftc path is not valid UTF-8") + .trim() + .to_string(); + + let toolchain_swift_lib = Path::new(&swiftc_path) + .parent() + .and_then(|p| p.parent()) + .map(|root| root.join("lib/swift/macosx")) + .expect("Unable to determine Swift toolchain lib directory"); + let sdk_swift_lib = Path::new(&sdk_path).join("usr/lib/swift"); + + // Use macOS 11.0 as deployment target for compatibility. + // The @available(macOS 26.0, *) checks in Swift handle runtime availability. + // Weak linking for FoundationModels is handled via cargo:rustc-link-arg below. + let status = Command::new("xcrun") + .args([ + "swiftc", + "-target", + "arm64-apple-macosx11.0", + "-sdk", + &sdk_path, + "-O", + "-import-objc-header", + BRIDGE_HEADER, + "-c", + source_file, + "-o", + object_path + .to_str() + .expect("Failed to convert object path to string"), + ]) + .status() + .expect("Failed to invoke swiftc for Apple Intelligence bridge"); + + if !status.success() { + panic!("swiftc failed to compile {source_file}"); + } + + let status = Command::new("libtool") + .args([ + "-static", + "-o", + static_lib_path + .to_str() + .expect("Failed to convert static lib path to string"), + object_path + .to_str() + .expect("Failed to convert object path to string"), + ]) + .status() + .expect("Failed to create static library for Apple Intelligence bridge"); + + if !status.success() { + panic!("libtool failed for Apple Intelligence bridge"); + } + + println!("cargo:rustc-link-search=native={}", out_dir.display()); + println!("cargo:rustc-link-lib=static=apple_intelligence"); + println!( + "cargo:rustc-link-search=native={}", + toolchain_swift_lib.display() + ); + println!("cargo:rustc-link-search=native={}", sdk_swift_lib.display()); + println!("cargo:rustc-link-lib=framework=Foundation"); + + if has_foundation_models { + // Use weak linking so the app can launch on systems without FoundationModels + println!("cargo:rustc-link-arg=-weak_framework"); + println!("cargo:rustc-link-arg=FoundationModels"); + } + + println!("cargo:rustc-link-arg=-Wl,-rpath,/usr/lib/swift"); +} diff --git a/tdn-desktop/src-tauri/src/apple_intelligence.rs b/tdn-desktop/src-tauri/src/apple_intelligence.rs new file mode 100644 index 00000000..6984048d --- /dev/null +++ b/tdn-desktop/src-tauri/src/apple_intelligence.rs @@ -0,0 +1,73 @@ +//! Safe Rust wrapper over the Apple Intelligence Swift FFI bridge. +//! +//! On macOS ARM64, this links to Swift functions that call Apple's +//! FoundationModels framework. On other platforms, these functions +//! are not available and the module is not compiled. + +use std::ffi::{CStr, CString}; +use std::os::raw::{c_char, c_int}; + +/// C-compatible response structure from Swift. +#[repr(C)] +pub struct AppleLLMResponse { + pub response: *mut c_char, + pub success: c_int, + pub error_message: *mut c_char, +} + +extern "C" { + pub fn is_apple_intelligence_available() -> c_int; + pub fn process_text_with_system_prompt_apple( + system_prompt: *const c_char, + user_content: *const c_char, + max_tokens: i32, + ) -> *mut AppleLLMResponse; + pub fn free_apple_llm_response(response: *mut AppleLLMResponse); +} + +/// Check if Apple Intelligence is available on this device. +pub fn check_availability() -> bool { + unsafe { is_apple_intelligence_available() == 1 } +} + +/// Process text with Apple Intelligence using a system prompt and user content. +/// Returns the model's response as a string, or an error message. +pub fn process_text( + system_prompt: &str, + user_content: &str, + max_tokens: i32, +) -> Result { + let system_cstr = CString::new(system_prompt).map_err(|e| e.to_string())?; + let user_cstr = CString::new(user_content).map_err(|e| e.to_string())?; + + let response_ptr = unsafe { + process_text_with_system_prompt_apple(system_cstr.as_ptr(), user_cstr.as_ptr(), max_tokens) + }; + + if response_ptr.is_null() { + return Err("Null response from Apple LLM".to_string()); + } + + let response = unsafe { &*response_ptr }; + + let result = if response.success == 1 { + if response.response.is_null() { + Ok(String::new()) + } else { + let c_str = unsafe { CStr::from_ptr(response.response) }; + Ok(c_str.to_string_lossy().into_owned()) + } + } else { + let error_msg = if !response.error_message.is_null() { + let c_str = unsafe { CStr::from_ptr(response.error_message) }; + c_str.to_string_lossy().into_owned() + } else { + "Unknown error".to_string() + }; + Err(error_msg) + }; + + unsafe { free_apple_llm_response(response_ptr) }; + + result +} diff --git a/tdn-desktop/src-tauri/src/bindings.rs b/tdn-desktop/src-tauri/src/bindings.rs index a3b14ff1..20d75cf8 100644 --- a/tdn-desktop/src-tauri/src/bindings.rs +++ b/tdn-desktop/src-tauri/src/bindings.rs @@ -1,7 +1,7 @@ use tauri_specta::{collect_commands, Builder}; pub fn generate_bindings() -> Builder { - use crate::commands::{config, notifications, preferences, quick_pane, recovery, vault}; + use crate::commands::{ai, config, notifications, preferences, quick_pane, recovery, vault}; Builder::::new().commands(collect_commands![ preferences::greet, @@ -37,6 +37,9 @@ pub fn generate_bindings() -> Builder { vault::update_project, vault::delete_task, vault::get_entity_raw_content, + // AI commands + ai::check_apple_intelligence_available, + ai::process_quick_entry_text, ]) } diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs new file mode 100644 index 00000000..7fe04b04 --- /dev/null +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -0,0 +1,223 @@ +//! Tauri commands for Apple Intelligence integration. +//! +//! Provides AI-powered processing of free-form text input in the quick entry pane, +//! parsing dictated/typed text into structured task fields. + +use serde::{Deserialize, Serialize}; +use specta::Type; + +/// Result of AI-processing free-form text into structured task fields. +#[derive(Debug, Clone, Serialize, Deserialize, Type)] +#[serde(rename_all = "camelCase")] +pub struct ParsedQuickEntry { + pub title: String, + pub body: String, + pub status: String, + pub due: Option, + pub scheduled: Option, + pub defer_until: Option, + /// Matched project ID (if a project name was recognised) + pub project_id: Option, + /// Matched area ID (if an area name was recognised) + pub area_id: Option, +} + +/// A name+ID pair for passing project/area context to the AI processor. +#[derive(Debug, Clone, Deserialize, Type)] +#[serde(rename_all = "camelCase")] +pub struct NameIdPair { + pub id: String, + pub name: String, +} + +/// Check if Apple Intelligence is available on this device. +#[tauri::command] +#[specta::specta] +pub fn check_apple_intelligence_available() -> bool { + #[cfg(all(target_os = "macos", target_arch = "aarch64"))] + { + crate::apple_intelligence::check_availability() + } + #[cfg(not(all(target_os = "macos", target_arch = "aarch64")))] + { + false + } +} + +/// Process free-form text input using Apple Intelligence to extract structured task fields. +/// +/// Takes the raw text from the quick entry title field, plus lists of available +/// projects and areas for context, and returns a parsed result with all fields populated. +#[tauri::command] +#[specta::specta] +pub fn process_quick_entry_text( + text: String, + projects: Vec, + areas: Vec, +) -> Result { + let trimmed = text.trim(); + if trimmed.is_empty() { + return Err("No text to process".to_string()); + } + + #[cfg(all(target_os = "macos", target_arch = "aarch64"))] + { + let system_prompt = build_system_prompt(&projects, &areas); + let response = crate::apple_intelligence::process_text(&system_prompt, trimmed, 0)?; + parse_ai_response(&response, trimmed, &projects, &areas) + } + + #[cfg(not(all(target_os = "macos", target_arch = "aarch64")))] + { + let _ = (projects, areas); + Err("Apple Intelligence is not available on this platform".to_string()) + } +} + +/// Build the system prompt with today's date and available projects/areas. +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +fn build_system_prompt(projects: &[NameIdPair], areas: &[NameIdPair]) -> String { + let today = chrono::Local::now(); + let date_str = today.format("%Y-%m-%d").to_string(); + let day_of_week = today.format("%A").to_string(); + + let project_names: Vec<&str> = projects.iter().map(|p| p.name.as_str()).collect(); + let area_names: Vec<&str> = areas.iter().map(|a| a.name.as_str()).collect(); + + let projects_list = if project_names.is_empty() { + "(none)".to_string() + } else { + project_names.join(", ") + }; + + let areas_list = if area_names.is_empty() { + "(none)".to_string() + } else { + area_names.join(", ") + }; + + format!( + "You are a task parser. Extract structured task fields from free-form input.\n\ + Today is {date_str} ({day_of_week}).\n\ + \n\ + Available projects: {projects_list}\n\ + Available areas: {areas_list}\n\ + \n\ + Rules:\n\ + - Create a concise, actionable title (not the raw input verbatim)\n\ + - Match project/area names exactly from the lists above, or return empty string\n\ + - Convert any relative dates to YYYY-MM-DD format based on today's date\n\ + - Default status to inbox unless clearly stated otherwise\n\ + - Put any detail beyond the title into the body field" + ) +} + +/// Parse the AI response JSON into a `ParsedQuickEntry`, resolving project/area names to IDs. +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +fn parse_ai_response( + response: &str, + original_text: &str, + projects: &[NameIdPair], + areas: &[NameIdPair], +) -> Result { + // Try to parse as JSON (structured output from @Generable) + if let Ok(parsed) = serde_json::from_str::(response) { + let title = parsed["title"] + .as_str() + .unwrap_or(original_text) + .trim() + .to_string(); + + let body_from_ai = parsed["body"].as_str().unwrap_or("").trim().to_string(); + + // Determine body: include original text unless title is identical to input + let body = if title.eq_ignore_ascii_case(original_text.trim()) { + // Title is the same as input — no need to duplicate in body + body_from_ai + } else { + // Title was transformed — preserve original text in body + if body_from_ai.is_empty() { + original_text.trim().to_string() + } else { + format!("{}\n\n{}", original_text.trim(), body_from_ai) + } + }; + + let status = parsed["status"] + .as_str() + .unwrap_or("inbox") + .trim() + .to_string(); + + // Validate status is a known value + let status = match status.as_str() { + "inbox" | "icebox" | "ready" | "in-progress" | "blocked" => status, + _ => "inbox".to_string(), + }; + + let due = non_empty_date(parsed["due"].as_str()); + let scheduled = non_empty_date(parsed["scheduled"].as_str()); + let defer_until = non_empty_date(parsed["deferUntil"].as_str()); + + // Match project name to ID (case-insensitive exact match) + let project_name = parsed["project"].as_str().unwrap_or("").trim(); + let project_id = match_name_to_id(project_name, projects); + + // Match area name to ID (case-insensitive exact match) + let area_name = parsed["area"].as_str().unwrap_or("").trim(); + let area_id = match_name_to_id(area_name, areas); + + Ok(ParsedQuickEntry { + title, + body, + status, + due, + scheduled, + defer_until, + project_id, + area_id, + }) + } else { + // Fallback: structured output failed, model returned plain text. + // Use the original text as title and the AI response as body context. + log::warn!("AI returned non-JSON response, using fallback parsing"); + Ok(ParsedQuickEntry { + title: original_text.trim().to_string(), + body: response.trim().to_string(), + status: "inbox".to_string(), + due: None, + scheduled: None, + defer_until: None, + project_id: None, + area_id: None, + }) + } +} + +/// Validate a date string is in YYYY-MM-DD format and return Some, or None if empty/invalid. +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +fn non_empty_date(s: Option<&str>) -> Option { + let s = s?.trim(); + if s.is_empty() { + return None; + } + // Validate YYYY-MM-DD format + if chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok() { + Some(s.to_string()) + } else { + log::warn!("AI returned invalid date format: {s}"); + None + } +} + +/// Case-insensitive exact match of a name to an ID from a list of name/ID pairs. +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +fn match_name_to_id(name: &str, pairs: &[NameIdPair]) -> Option { + if name.is_empty() { + return None; + } + pairs + .iter() + .find(|p| p.name.eq_ignore_ascii_case(name)) + .map(|p| p.id.clone()) +} diff --git a/tdn-desktop/src-tauri/src/commands/mod.rs b/tdn-desktop/src-tauri/src/commands/mod.rs index 9dc94f58..02bcb1f3 100644 --- a/tdn-desktop/src-tauri/src/commands/mod.rs +++ b/tdn-desktop/src-tauri/src/commands/mod.rs @@ -3,6 +3,7 @@ //! Each submodule contains related commands and their helper functions. //! Import specific commands via their submodule (e.g., `commands::preferences::greet`). +pub mod ai; pub mod config; pub mod notifications; pub mod preferences; diff --git a/tdn-desktop/src-tauri/src/lib.rs b/tdn-desktop/src-tauri/src/lib.rs index f5f15660..4f60d201 100644 --- a/tdn-desktop/src-tauri/src/lib.rs +++ b/tdn-desktop/src-tauri/src/lib.rs @@ -10,6 +10,9 @@ mod types; mod utils; pub mod vault; +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +mod apple_intelligence; + use std::error::Error; use tauri::{App, AppHandle, Manager, RunEvent, WindowEvent}; use vault::VaultManager; diff --git a/tdn-desktop/src-tauri/swift/apple_intelligence.swift b/tdn-desktop/src-tauri/swift/apple_intelligence.swift new file mode 100644 index 00000000..0e68bf4a --- /dev/null +++ b/tdn-desktop/src-tauri/swift/apple_intelligence.swift @@ -0,0 +1,206 @@ +import Dispatch +import Foundation +import FoundationModels + +// MARK: - Generable types for structured task parsing + +@available(macOS 26.0, *) +@Generable +private struct ParsedTask: Sendable { + @Guide(description: "A concise task title summarizing the request") + let title: String + + @Guide(description: "Additional context or notes, empty string if none") + let body: String + + let status: ParsedStatus + + @Guide(description: "Due date in YYYY-MM-DD format, empty string if none") + let due: String + + @Guide(description: "Scheduled date in YYYY-MM-DD format, empty string if none") + let scheduled: String + + @Guide(description: "Defer-until date in YYYY-MM-DD format, empty string if none") + let deferUntil: String + + @Guide(description: "Exact project name from the available list, empty string if none") + let project: String + + @Guide(description: "Exact area name from the available list, empty string if none") + let area: String +} + +@available(macOS 26.0, *) +@Generable +private enum ParsedStatus: Sendable { + case inbox + case icebox + case ready + case inProgress + case blocked +} + +// MARK: - Helpers + +private typealias ResponsePointer = UnsafeMutablePointer + +private func duplicateCString(_ text: String) -> UnsafeMutablePointer? { + return text.withCString { basePointer in + guard let duplicated = strdup(basePointer) else { return nil } + return duplicated + } +} + +/// Strip invisible Unicode characters that LLMs sometimes insert. +private func stripInvisibleChars(_ text: String) -> String { + return text.replacingOccurrences(of: "\u{200B}", with: "") // zero-width space + .replacingOccurrences(of: "\u{200C}", with: "") // zero-width non-joiner + .replacingOccurrences(of: "\u{200D}", with: "") // zero-width joiner + .replacingOccurrences(of: "\u{FEFF}", with: "") // BOM +} + +// MARK: - Convert ParsedStatus to string + +@available(macOS 26.0, *) +private func statusToString(_ status: ParsedStatus) -> String { + switch status { + case .inbox: return "inbox" + case .icebox: return "icebox" + case .ready: return "ready" + case .inProgress: return "in-progress" + case .blocked: return "blocked" + } +} + +// MARK: - Convert ParsedTask to JSON string + +@available(macOS 26.0, *) +private func parsedTaskToJSON(_ task: ParsedTask) -> String { + // Build JSON manually to avoid Codable complexity with @Generable + let fields: [(String, String)] = [ + ("title", task.title), + ("body", task.body), + ("status", statusToString(task.status)), + ("due", task.due), + ("scheduled", task.scheduled), + ("deferUntil", task.deferUntil), + ("project", task.project), + ("area", task.area), + ] + + let pairs = fields.map { (key, value) in + let escaped = value + .replacingOccurrences(of: "\\", with: "\\\\") + .replacingOccurrences(of: "\"", with: "\\\"") + .replacingOccurrences(of: "\n", with: "\\n") + .replacingOccurrences(of: "\r", with: "\\r") + .replacingOccurrences(of: "\t", with: "\\t") + return "\"\(key)\":\"\(escaped)\"" + } + + return "{\(pairs.joined(separator: ","))}" +} + +// MARK: - Public C-callable functions + +@_cdecl("is_apple_intelligence_available") +public func isAppleIntelligenceAvailable() -> Int32 { + guard #available(macOS 26.0, *) else { + return 0 + } + + let model = SystemLanguageModel.default + switch model.availability { + case .available: + return 1 + case .unavailable: + return 0 + } +} + +@_cdecl("process_text_with_system_prompt_apple") +public func processTextWithSystemPrompt( + _ systemPrompt: UnsafePointer, + _ userContent: UnsafePointer, + maxTokens: Int32 +) -> UnsafeMutablePointer { + let swiftSystemPrompt = String(cString: systemPrompt) + let swiftUserContent = String(cString: userContent) + let responsePtr = ResponsePointer.allocate(capacity: 1) + responsePtr.initialize(to: AppleLLMResponse(response: nil, success: 0, error_message: nil)) + + guard #available(macOS 26.0, *) else { + responsePtr.pointee.error_message = duplicateCString( + "Apple Intelligence requires macOS 26 or newer." + ) + return responsePtr + } + + let model = SystemLanguageModel.default + guard model.availability == .available else { + responsePtr.pointee.error_message = duplicateCString( + "Apple Intelligence is not currently available on this device." + ) + return responsePtr + } + + let semaphore = DispatchSemaphore(value: 0) + + final class ResultBox: @unchecked Sendable { + var response: String? + var error: String? + } + let box = ResultBox() + + Task.detached(priority: .userInitiated) { + defer { semaphore.signal() } + do { + let session = LanguageModelSession( + model: model, + instructions: swiftSystemPrompt + ) + + // Try structured output first + do { + let structured = try await session.respond( + to: swiftUserContent, + generating: ParsedTask.self + ) + let json = parsedTaskToJSON(structured.content) + box.response = stripInvisibleChars(json) + } catch { + // Fall back to plain text response + let fallback = try await session.respond(to: swiftUserContent) + box.response = stripInvisibleChars(fallback.content) + } + } catch { + box.error = error.localizedDescription + } + } + + semaphore.wait() + + if let response = box.response { + responsePtr.pointee.response = duplicateCString(response) + responsePtr.pointee.success = 1 + } else { + responsePtr.pointee.error_message = duplicateCString(box.error ?? "Unknown error") + } + + return responsePtr +} + +@_cdecl("free_apple_llm_response") +public func freeAppleLLMResponse(_ response: UnsafeMutablePointer?) { + guard let response = response else { return } + + if let responseStr = response.pointee.response { + free(UnsafeMutablePointer(mutating: responseStr)) + } + if let errorStr = response.pointee.error_message { + free(UnsafeMutablePointer(mutating: errorStr)) + } + + response.deallocate() +} diff --git a/tdn-desktop/src-tauri/swift/apple_intelligence_bridge.h b/tdn-desktop/src-tauri/swift/apple_intelligence_bridge.h new file mode 100644 index 00000000..dd8b407e --- /dev/null +++ b/tdn-desktop/src-tauri/swift/apple_intelligence_bridge.h @@ -0,0 +1,27 @@ +#ifndef apple_intelligence_bridge_h +#define apple_intelligence_bridge_h + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + char* response; + int success; // 0 for failure, 1 for success + char* error_message; // Only valid when success = 0 +} AppleLLMResponse; + +// Check if Apple Intelligence is available on the device +int is_apple_intelligence_available(void); + +// Process text using Apple's on-device LLM with separate system prompt and user content +AppleLLMResponse* process_text_with_system_prompt_apple(const char* system_prompt, const char* user_content, int max_tokens); + +// Free memory allocated by the Apple LLM response +void free_apple_llm_response(AppleLLMResponse* response); + +#ifdef __cplusplus +} +#endif + +#endif /* apple_intelligence_bridge_h */ diff --git a/tdn-desktop/src-tauri/swift/apple_intelligence_stub.swift b/tdn-desktop/src-tauri/swift/apple_intelligence_stub.swift new file mode 100644 index 00000000..ecfee351 --- /dev/null +++ b/tdn-desktop/src-tauri/swift/apple_intelligence_stub.swift @@ -0,0 +1,38 @@ +import Foundation + +// Stub implementation when FoundationModels is not available. +// Compiled via Cargo build script when the build environment +// does not support Apple Intelligence (e.g. older Xcode/SDK). + +private typealias ResponsePointer = UnsafeMutablePointer + +@_cdecl("is_apple_intelligence_available") +public func isAppleIntelligenceAvailable() -> Int32 { + return 0 +} + +@_cdecl("process_text_with_system_prompt_apple") +public func processTextWithSystemPrompt( + _ systemPrompt: UnsafePointer, + _ userContent: UnsafePointer, + maxTokens: Int32 +) -> UnsafeMutablePointer { + let responsePtr = ResponsePointer.allocate(capacity: 1) + responsePtr.initialize(to: AppleLLMResponse(response: nil, success: 0, error_message: nil)) + responsePtr.pointee.error_message = strdup("Apple Intelligence is not available in this build (SDK requirement not met).") + return responsePtr +} + +@_cdecl("free_apple_llm_response") +public func freeAppleLLMResponse(_ response: UnsafeMutablePointer?) { + guard let response = response else { return } + + if let responseStr = response.pointee.response { + free(UnsafeMutablePointer(mutating: responseStr)) + } + if let errorStr = response.pointee.error_message { + free(UnsafeMutablePointer(mutating: errorStr)) + } + + response.deallocate() +} diff --git a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx index a58d69ab..69f8dce4 100644 --- a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx +++ b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx @@ -33,6 +33,7 @@ const SHORTCUTS = { openDue: parseShortcut('Shift+CmdOrCtrl+D'), openDefer: parseShortcut('Ctrl+Shift+CmdOrCtrl+D'), openStatus: parseShortcut('CmdOrCtrl+S'), + processWithAI: parseShortcut('Shift+CmdOrCtrl+A'), } // ───────────────────────────────────────────────────────────────────────────── @@ -117,6 +118,8 @@ export default function QuickPaneApp() { const [exiting, setExiting] = React.useState(false) const [isSubmitting, setIsSubmitting] = React.useState(false) + const [isProcessingAI, setIsProcessingAI] = React.useState(false) + const [aiAvailable, setAiAvailable] = React.useState(false) const [openPopover, setOpenPopover] = React.useState(null) const [restoreFocusTo, setRestoreFocusTo] = React.useState(null) @@ -234,6 +237,71 @@ export default function QuickPaneApp() { handleDismiss, ]) + // ───────────────────────────────────────────────────────────────────────── + // AI Processing Handler + // ───────────────────────────────────────────────────────────────────────── + + const handleProcessWithAI = React.useCallback(async () => { + const trimmedTitle = title.trim() + if (!trimmedTitle || isProcessingAI) return + + setIsProcessingAI(true) + + try { + // Build name/ID pairs for context + const projectPairs = projects.map(p => ({ id: p.id, name: p.title })) + const areaPairs = areas.map(a => ({ id: a.id, name: a.title })) + + const result = await commands.processQuickEntryText( + trimmedTitle, + projectPairs, + areaPairs + ) + + if (result.status === 'error') { + logger.warn('AI processing failed', { error: result.error }) + setIsProcessingAI(false) + return + } + + const parsed = result.data + + // Populate form fields from AI result + setTitle(parsed.title) + + if (parsed.body) { + setBody(parsed.body) + setShowBody(true) + } + + // Map status string to TaskStatus + const validStatuses: TaskStatus[] = [ + 'inbox', + 'icebox', + 'ready', + 'in-progress', + 'blocked', + 'dropped', + 'done', + ] + if (validStatuses.includes(parsed.status as TaskStatus)) { + setStatus(parsed.status as TaskStatus) + } + + if (parsed.due) setDue(parsed.due) + if (parsed.scheduled) setScheduled(parsed.scheduled) + if (parsed.deferUntil) setDeferUntil(parsed.deferUntil) + if (parsed.projectId) setProjectId(parsed.projectId) + if (parsed.areaId) setAreaId(parsed.areaId) + + logger.info('AI processing complete') + } catch (error) { + logger.error('Unexpected error during AI processing', { error }) + } + + setIsProcessingAI(false) + }, [title, projects, areas, isProcessingAI]) + // ───────────────────────────────────────────────────────────────────────── // Theme Sync // ───────────────────────────────────────────────────────────────────────── @@ -265,10 +333,11 @@ export default function QuickPaneApp() { // Reset form on focus (fresh start) resetForm() - // Load areas and projects - const [areasResult, projectsResult] = await Promise.all([ + // Load areas, projects, and check AI availability + const [areasResult, projectsResult, aiResult] = await Promise.all([ commands.listAreas(), commands.listProjects(), + commands.checkAppleIntelligenceAvailable(), ]) if (areasResult.status === 'ok') { @@ -278,6 +347,8 @@ export default function QuickPaneApp() { setProjects(projectsResult.data) } + setAiAvailable(aiResult) + // Focus title input setTimeout(() => titleRef.current?.focus(), FOCUS_DELAY_MS) } else { @@ -339,6 +410,7 @@ export default function QuickPaneApp() { setOpenPopover(popover) }, onClosePopover: () => setOpenPopover(null), + onProcessWithAI: aiAvailable ? handleProcessWithAI : undefined, captureCurrentFocus, openPopover, showBody, @@ -373,6 +445,9 @@ export default function QuickPaneApp() { onChange={setTitle} onKeyDown={handleTitleKeyDown} inputRef={titleRef} + aiAvailable={aiAvailable} + aiProcessing={isProcessingAI} + onProcessWithAI={handleProcessWithAI} /> void onKeyDown?: (e: React.KeyboardEvent) => void inputRef?: React.RefObject + aiAvailable?: boolean + aiProcessing?: boolean + onProcessWithAI?: () => void } /** - * QuickPaneTitle - Title input row with visual checkbox. + * QuickPaneTitle - Title input row with visual checkbox and optional AI button. * * Features: * - Visual-only checkbox (always unchecked, non-interactive) * - Auto-resizing textarea that grows with content * - Prevents Enter from creating newlines (handled by parent) + * - AI processing button (visible only when Apple Intelligence is available) */ export function QuickPaneTitle({ value, onChange, onKeyDown, inputRef, + aiAvailable, + aiProcessing, + onProcessWithAI, }: QuickPaneTitleProps) { const handleChange = (e: React.ChangeEvent) => { onChange(e.target.value) @@ -29,6 +36,8 @@ export function QuickPaneTitle({ e.target.style.height = `${e.target.scrollHeight}px` } + const showAIButton = aiAvailable && value.trim().length > 0 + return (
{/* Visual checkbox - vertically centered with first line of text-xl textarea */} @@ -47,6 +56,60 @@ export function QuickPaneTitle({ autoCapitalize="off" spellCheck={false} /> + + {showAIButton && ( + + )}
) } + +/** Sparkles icon for the AI button */ +function SparklesIcon({ className }: { className?: string }) { + return ( + + + + + + ) +} + +/** Simple spinner icon for loading state */ +function SpinnerIcon({ className }: { className?: string }) { + return ( + + + + ) +} diff --git a/tdn-desktop/src/components/quick-pane/useQuickPaneKeyboard.ts b/tdn-desktop/src/components/quick-pane/useQuickPaneKeyboard.ts index 72e2fb50..f22c92a6 100644 --- a/tdn-desktop/src/components/quick-pane/useQuickPaneKeyboard.ts +++ b/tdn-desktop/src/components/quick-pane/useQuickPaneKeyboard.ts @@ -16,6 +16,8 @@ interface UseQuickPaneKeyboardOptions { onOpenPopover: (popover: PopoverType) => void /** Called when Escape pressed with a popover open */ onClosePopover: () => void + /** Called when Cmd+Shift+A pressed (only if AI is available) */ + onProcessWithAI?: () => void /** Called before opening a popover to capture current focus */ captureCurrentFocus: () => void /** Current open popover (null if none) */ @@ -29,6 +31,7 @@ interface UseQuickPaneKeyboardOptions { openDue: ParsedShortcut openDefer: ParsedShortcut openStatus: ParsedShortcut + processWithAI: ParsedShortcut } } @@ -43,6 +46,7 @@ export function useQuickPaneKeyboard({ onSetScheduledToday, onOpenPopover, onClosePopover, + onProcessWithAI, captureCurrentFocus, openPopover, showBody, @@ -102,6 +106,13 @@ export function useQuickPaneKeyboard({ return } + // Cmd+Shift+A - process with AI (only when available) + if (onProcessWithAI && matchesKeyboardEvent(shortcuts.processWithAI, e)) { + e.preventDefault() + onProcessWithAI() + return + } + // Cmd+Shift+Enter - toggle body if (e.key === 'Enter' && e.metaKey && e.shiftKey) { e.preventDefault() @@ -127,6 +138,7 @@ export function useQuickPaneKeyboard({ onSetScheduledToday, onOpenPopover, onClosePopover, + onProcessWithAI, captureCurrentFocus, openPopover, showBody, diff --git a/tdn-desktop/src/lib/bindings.ts b/tdn-desktop/src/lib/bindings.ts index 7a8ec139..31da455a 100644 --- a/tdn-desktop/src/lib/bindings.ts +++ b/tdn-desktop/src/lib/bindings.ts @@ -353,6 +353,26 @@ async getEntityRawContent(entityType: string, id: string) : Promise { + return await TAURI_INVOKE("check_apple_intelligence_available"); +}, +/** + * Process free-form text input using Apple Intelligence to extract structured task fields. + * + * Takes the raw text from the quick entry title field, plus lists of available + * projects and areas for context, and returns a parsed result with all fields populated. + */ +async processQuickEntryText(text: string, projects: NameIdPair[], areas: NameIdPair[]) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("process_quick_entry_text", { text, projects, areas }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} } } @@ -483,6 +503,22 @@ export type CreateTaskOptions = { title: string | null; status: TaskStatus | nul */ export type DummyVaultPaths = { tasksDir: string; areasDir: string; projectsDir: string } export type JsonValue = null | boolean | number | string | JsonValue[] | Partial<{ [key in string]: JsonValue }> +/** + * A name+ID pair for passing project/area context to the AI processor. + */ +export type NameIdPair = { id: string; name: string } +/** + * Result of AI-processing free-form text into structured task fields. + */ +export type ParsedQuickEntry = { title: string; body: string; status: string; due: string | null; scheduled: string | null; deferUntil: string | null; +/** + * Matched project ID (if a project name was recognised) + */ +projectId: string | null; +/** + * Matched area ID (if an area name was recognised) + */ +areaId: string | null } /** * Public project struct exposed to TypeScript via tauri-specta. */ From 8f0085f26eb7480906e7c867ff53aef2a7438d0a Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Mon, 23 Mar 2026 02:54:58 +0000 Subject: [PATCH 04/32] Fix wikilinks using hash IDs instead of entity titles The vault writer expects project/area names for wikilinks (e.g. "[[Acme Corp]]") but the frontend passes hash IDs from the entity index. This caused files on disk to contain wikilinks like "[[ea7d3b50f1c597cb]]" instead of "[[Acme Corp]]". Resolve hash IDs to entity titles in all four write paths: create_task, create_project, update_task, update_project. Co-Authored-By: Claude Opus 4.6 (1M context) --- tdn-desktop/src-tauri/src/vault/manager.rs | 68 ++++++++++++++++++++-- 1 file changed, 62 insertions(+), 6 deletions(-) diff --git a/tdn-desktop/src-tauri/src/vault/manager.rs b/tdn-desktop/src-tauri/src/vault/manager.rs index 8e9cda37..b2ceaf18 100644 --- a/tdn-desktop/src-tauri/src/vault/manager.rs +++ b/tdn-desktop/src-tauri/src/vault/manager.rs @@ -426,13 +426,29 @@ impl VaultManager { options.title.as_deref().unwrap_or("(untitled)") ); - let tasks_dir = { + let (tasks_dir, options) = { let inner = self.inner.read(); - inner + let tasks_dir = inner .config .as_ref() .map(|c| c.tasks_dir.clone()) - .ok_or_else(|| VaultError::not_configured("Vault not initialized"))? + .ok_or_else(|| VaultError::not_configured("Vault not initialized"))?; + + // Resolve project/area IDs to titles for wikilinks. + // The writer expects titles (e.g. "Q1 Planning"), not hash IDs. + let mut options = options; + if let Some(ref id) = options.project_id { + if let Some(project) = inner.index.get_project(id) { + options.project_id = Some(project.title.clone()); + } + } + if let Some(ref id) = options.area_id { + if let Some(area) = inner.index.get_area(id) { + options.area_id = Some(area.title.clone()); + } + } + + (tasks_dir, options) }; // Use RAII guard to ensure write flag is always reset, even on panic @@ -454,13 +470,23 @@ impl VaultManager { self.ensure_configured()?; debug!("Creating project: {}", options.title); - let projects_dir = { + let (projects_dir, options) = { let inner = self.inner.read(); - inner + let projects_dir = inner .config .as_ref() .map(|c| c.projects_dir.clone()) - .ok_or_else(|| VaultError::not_configured("Vault not initialized"))? + .ok_or_else(|| VaultError::not_configured("Vault not initialized"))?; + + // Resolve area ID to title for wikilinks + let mut options = options; + if let Some(ref id) = options.area_id { + if let Some(area) = inner.index.get_area(id) { + options.area_id = Some(area.title.clone()); + } + } + + (projects_dir, options) }; let _guard = WriteFlagGuard::new(self); @@ -482,6 +508,25 @@ impl VaultManager { let task = self.get_task(&update.id)?; + // Resolve project/area IDs to titles for wikilinks + let mut update = update; + if let Some(ref value) = update.project { + if !value.is_empty() { + let inner = self.inner.read(); + if let Some(project) = inner.index.get_project(value) { + update.project = Some(project.title.clone()); + } + } + } + if let Some(ref value) = update.area { + if !value.is_empty() { + let inner = self.inner.read(); + if let Some(area) = inner.index.get_area(value) { + update.area = Some(area.title.clone()); + } + } + } + let _guard = WriteFlagGuard::new(self); let updated_task = crate::vault::update_task(&task, update.clone())?; @@ -501,6 +546,17 @@ impl VaultManager { let project = self.get_project(&update.id)?; + // Resolve area ID to title for wikilinks + let mut update = update; + if let Some(ref value) = update.area { + if !value.is_empty() { + let inner = self.inner.read(); + if let Some(area) = inner.index.get_area(value) { + update.area = Some(area.title.clone()); + } + } + } + let _guard = WriteFlagGuard::new(self); let updated_project = crate::vault::update_project(&project, update.clone())?; From d4828be0e07b7913e958842a5611fd214c1231e0 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Mon, 23 Mar 2026 03:03:45 +0000 Subject: [PATCH 05/32] Tigheten Apple Intelligence prompts --- tdn-desktop/src-tauri/src/commands/ai.rs | 37 ++++++++++++++++--- .../src-tauri/swift/apple_intelligence.swift | 12 +++--- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs index 7fe04b04..b792c4bd 100644 --- a/tdn-desktop/src-tauri/src/commands/ai.rs +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -105,10 +105,14 @@ fn build_system_prompt(projects: &[NameIdPair], areas: &[NameIdPair]) -> String \n\ Rules:\n\ - Create a concise, actionable title (not the raw input verbatim)\n\ - - Match project/area names exactly from the lists above, or return empty string\n\ - - Convert any relative dates to YYYY-MM-DD format based on today's date\n\ + - ONLY set a date field if the input EXPLICITLY mentions a date or deadline. \ + Do NOT invent or guess dates. Leave as empty string if no date is mentioned.\n\ + - ONLY set project/area if the input EXPLICITLY names one from the lists above. \ + Do NOT guess or infer. Leave as empty string if not mentioned.\n\ + - If a date is mentioned, convert it to YYYY-MM-DD format based on today's date\n\ - Default status to inbox unless clearly stated otherwise\n\ - - Put any detail beyond the title into the body field" + - Body should be empty string unless the input contains meaningful detail beyond the title. \ + Do NOT repeat the title in the body. Do NOT add information that was not in the input." ) } @@ -133,10 +137,18 @@ fn parse_ai_response( // Determine body: include original text unless title is identical to input let body = if title.eq_ignore_ascii_case(original_text.trim()) { // Title is the same as input — no need to duplicate in body - body_from_ai + // But only use AI body if it adds new information + if is_essentially_same(&body_from_ai, original_text.trim()) { + String::new() + } else { + body_from_ai + } } else { // Title was transformed — preserve original text in body - if body_from_ai.is_empty() { + // Don't append AI body if it's just parroting the input + if body_from_ai.is_empty() + || is_essentially_same(&body_from_ai, original_text.trim()) + { original_text.trim().to_string() } else { format!("{}\n\n{}", original_text.trim(), body_from_ai) @@ -210,6 +222,21 @@ fn non_empty_date(s: Option<&str>) -> Option { } } +/// Check if two strings are essentially the same (ignoring case, trailing punctuation, whitespace). +/// Used to avoid duplicating content when the AI parrots back the input. +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +fn is_essentially_same(a: &str, b: &str) -> bool { + if a.is_empty() || b.is_empty() { + return a.is_empty() && b.is_empty(); + } + let normalize = |s: &str| { + s.trim() + .trim_end_matches(|c: char| c == '.' || c == '!' || c == '?') + .to_lowercase() + }; + normalize(a) == normalize(b) +} + /// Case-insensitive exact match of a name to an ID from a list of name/ID pairs. #[cfg(all(target_os = "macos", target_arch = "aarch64"))] fn match_name_to_id(name: &str, pairs: &[NameIdPair]) -> Option { diff --git a/tdn-desktop/src-tauri/swift/apple_intelligence.swift b/tdn-desktop/src-tauri/swift/apple_intelligence.swift index 0e68bf4a..5ae1ee4b 100644 --- a/tdn-desktop/src-tauri/swift/apple_intelligence.swift +++ b/tdn-desktop/src-tauri/swift/apple_intelligence.swift @@ -10,24 +10,24 @@ private struct ParsedTask: Sendable { @Guide(description: "A concise task title summarizing the request") let title: String - @Guide(description: "Additional context or notes, empty string if none") + @Guide(description: "Empty string unless the input contains extra detail beyond the title") let body: String let status: ParsedStatus - @Guide(description: "Due date in YYYY-MM-DD format, empty string if none") + @Guide(description: "ONLY if a due date is explicitly mentioned in the input. YYYY-MM-DD format. Empty string if no due date mentioned.") let due: String - @Guide(description: "Scheduled date in YYYY-MM-DD format, empty string if none") + @Guide(description: "ONLY if a scheduled date is explicitly mentioned in the input. YYYY-MM-DD format. Empty string if no scheduled date mentioned.") let scheduled: String - @Guide(description: "Defer-until date in YYYY-MM-DD format, empty string if none") + @Guide(description: "ONLY if a defer-until date is explicitly mentioned in the input. YYYY-MM-DD format. Empty string if no defer date mentioned.") let deferUntil: String - @Guide(description: "Exact project name from the available list, empty string if none") + @Guide(description: "ONLY if the input explicitly mentions a project from the available list. Empty string if no project mentioned.") let project: String - @Guide(description: "Exact area name from the available list, empty string if none") + @Guide(description: "ONLY if the input explicitly mentions an area from the available list. Empty string if no area mentioned.") let area: String } From 5a83b98b1e14a282a8fc5a407cbce10130e88ee6 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Wed, 25 Mar 2026 21:34:41 +0000 Subject: [PATCH 06/32] Update AI powered stuff. --- tdn-desktop/src-tauri/src/commands/ai.rs | 117 +++++++++------ .../src-tauri/src/commands/ai_prompts.rs | 140 ++++++++++++++++++ tdn-desktop/src-tauri/src/commands/mod.rs | 1 + .../src-tauri/swift/apple_intelligence.swift | 14 +- .../components/quick-pane/QuickPaneApp.tsx | 12 +- tdn-desktop/src/lib/bindings.ts | 12 +- 6 files changed, 236 insertions(+), 60 deletions(-) create mode 100644 tdn-desktop/src-tauri/src/commands/ai_prompts.rs diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs index b792c4bd..1dace847 100644 --- a/tdn-desktop/src-tauri/src/commands/ai.rs +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -30,6 +30,16 @@ pub struct NameIdPair { pub name: String, } +/// A project with its area relationship for richer AI context. +#[derive(Debug, Clone, Deserialize, Type)] +#[serde(rename_all = "camelCase")] +pub struct ProjectContext { + pub id: String, + pub name: String, + /// The area name this project belongs to (if any) + pub area_name: Option, +} + /// Check if Apple Intelligence is available on this device. #[tauri::command] #[specta::specta] @@ -47,12 +57,12 @@ pub fn check_apple_intelligence_available() -> bool { /// Process free-form text input using Apple Intelligence to extract structured task fields. /// /// Takes the raw text from the quick entry title field, plus lists of available -/// projects and areas for context, and returns a parsed result with all fields populated. +/// projects (with area relationships) and areas for context. #[tauri::command] #[specta::specta] pub fn process_quick_entry_text( text: String, - projects: Vec, + projects: Vec, areas: Vec, ) -> Result { let trimmed = text.trim(); @@ -62,9 +72,50 @@ pub fn process_quick_entry_text( #[cfg(all(target_os = "macos", target_arch = "aarch64"))] { - let system_prompt = build_system_prompt(&projects, &areas); + let today = chrono::Local::now(); + let date_str = today.format("%Y-%m-%d").to_string(); + let day_of_week = today.format("%A").to_string(); + + let projects_with_areas: Vec = projects + .iter() + .map(|p| super::ai_prompts::ProjectWithArea { + name: p.name.clone(), + area_name: p.area_name.clone(), + }) + .collect(); + + let system_prompt = + super::ai_prompts::build_system_prompt(&projects_with_areas, &areas, &date_str, &day_of_week); + + log::info!("── AI Quick Entry ──────────────────────────────────"); + log::info!("Input: {trimmed:?}"); + log::debug!("System prompt:\n{system_prompt}"); + let response = crate::apple_intelligence::process_text(&system_prompt, trimmed, 0)?; - parse_ai_response(&response, trimmed, &projects, &areas) + + log::info!("Raw response: {response}"); + + let result = parse_ai_response(&response, trimmed, &projects, &areas)?; + + log::info!("Mapped result:"); + log::info!(" title: {:?}", result.title); + log::info!( + " body: {:?}", + if result.body.is_empty() { + "(empty)" + } else { + &result.body + } + ); + log::info!(" status: {:?}", result.status); + log::info!(" due: {:?}", result.due); + log::info!(" scheduled: {:?}", result.scheduled); + log::info!(" defer: {:?}", result.defer_until); + log::info!(" project: {:?}", result.project_id); + log::info!(" area: {:?}", result.area_id); + log::info!("────────────────────────────────────────────────────"); + + Ok(result) } #[cfg(not(all(target_os = "macos", target_arch = "aarch64")))] @@ -74,54 +125,12 @@ pub fn process_quick_entry_text( } } -/// Build the system prompt with today's date and available projects/areas. -#[cfg(all(target_os = "macos", target_arch = "aarch64"))] -fn build_system_prompt(projects: &[NameIdPair], areas: &[NameIdPair]) -> String { - let today = chrono::Local::now(); - let date_str = today.format("%Y-%m-%d").to_string(); - let day_of_week = today.format("%A").to_string(); - - let project_names: Vec<&str> = projects.iter().map(|p| p.name.as_str()).collect(); - let area_names: Vec<&str> = areas.iter().map(|a| a.name.as_str()).collect(); - - let projects_list = if project_names.is_empty() { - "(none)".to_string() - } else { - project_names.join(", ") - }; - - let areas_list = if area_names.is_empty() { - "(none)".to_string() - } else { - area_names.join(", ") - }; - - format!( - "You are a task parser. Extract structured task fields from free-form input.\n\ - Today is {date_str} ({day_of_week}).\n\ - \n\ - Available projects: {projects_list}\n\ - Available areas: {areas_list}\n\ - \n\ - Rules:\n\ - - Create a concise, actionable title (not the raw input verbatim)\n\ - - ONLY set a date field if the input EXPLICITLY mentions a date or deadline. \ - Do NOT invent or guess dates. Leave as empty string if no date is mentioned.\n\ - - ONLY set project/area if the input EXPLICITLY names one from the lists above. \ - Do NOT guess or infer. Leave as empty string if not mentioned.\n\ - - If a date is mentioned, convert it to YYYY-MM-DD format based on today's date\n\ - - Default status to inbox unless clearly stated otherwise\n\ - - Body should be empty string unless the input contains meaningful detail beyond the title. \ - Do NOT repeat the title in the body. Do NOT add information that was not in the input." - ) -} - /// Parse the AI response JSON into a `ParsedQuickEntry`, resolving project/area names to IDs. #[cfg(all(target_os = "macos", target_arch = "aarch64"))] fn parse_ai_response( response: &str, original_text: &str, - projects: &[NameIdPair], + projects: &[ProjectContext], areas: &[NameIdPair], ) -> Result { // Try to parse as JSON (structured output from @Generable) @@ -173,7 +182,7 @@ fn parse_ai_response( // Match project name to ID (case-insensitive exact match) let project_name = parsed["project"].as_str().unwrap_or("").trim(); - let project_id = match_name_to_id(project_name, projects); + let project_id = match_project_name_to_id(project_name, projects); // Match area name to ID (case-insensitive exact match) let area_name = parsed["area"].as_str().unwrap_or("").trim(); @@ -237,6 +246,18 @@ fn is_essentially_same(a: &str, b: &str) -> bool { normalize(a) == normalize(b) } +/// Case-insensitive exact match of a project name to its ID. +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +fn match_project_name_to_id(name: &str, projects: &[ProjectContext]) -> Option { + if name.is_empty() { + return None; + } + projects + .iter() + .find(|p| p.name.eq_ignore_ascii_case(name)) + .map(|p| p.id.clone()) +} + /// Case-insensitive exact match of a name to an ID from a list of name/ID pairs. #[cfg(all(target_os = "macos", target_arch = "aarch64"))] fn match_name_to_id(name: &str, pairs: &[NameIdPair]) -> Option { diff --git a/tdn-desktop/src-tauri/src/commands/ai_prompts.rs b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs new file mode 100644 index 00000000..b34ee383 --- /dev/null +++ b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs @@ -0,0 +1,140 @@ +//! AI prompt templates for Apple Intelligence quick entry processing. +//! +//! All prompt text is centralized here for easy iteration. +//! The system prompt guides the ~3B on-device model through step-by-step +//! decision-making for each task field. + +use super::ai::NameIdPair; + +/// A project with its area relationship, for richer context in the prompt. +pub struct ProjectWithArea { + pub name: String, + pub area_name: Option, +} + +/// Build the complete system prompt for quick entry processing. +pub fn build_system_prompt( + projects_with_areas: &[ProjectWithArea], + areas: &[NameIdPair], + today: &str, + day_of_week: &str, +) -> String { + let context_block = build_context_block(projects_with_areas, areas); + + format!( + "{ROLE_AND_CONTEXT}\n\ + \n\ + Today is {today} ({day_of_week}).\n\ + \n\ + {context_block}\n\ + \n\ + {STEP_BY_STEP_INSTRUCTIONS}" + ) +} + +const ROLE_AND_CONTEXT: &str = "\ +You are a task parser. You take free-form text (often dictated speech) and extract \ +structured task fields. You MUST only extract information that is ACTUALLY PRESENT \ +in the input. Do NOT invent, guess, or infer information that isn't there."; + +const STEP_BY_STEP_INSTRUCTIONS: &str = "\ +Follow these steps IN ORDER to decide each field. For every field, if the input does \ +not clearly indicate a value, you MUST return an empty string. + +STEP 1 — Title: +Create a concise, actionable task title from the input. Keep it short. \ +Do not just copy the input verbatim — clean it up and make it scannable. \ +Examples: 'I need to call the dentist about that appointment' → 'Call dentist about appointment' + +STEP 2 — Body: +If the input contains meaningful detail BEYOND what the title captures, put it here. \ +Otherwise return empty string. NEVER repeat or paraphrase the title. NEVER add \ +information that was not in the original input. + +STEP 3 — Project and Area: +ONLY set these if the input EXPLICITLY names or clearly references a specific project \ +or area from the available lists. 'upgrading the database' does NOT imply any project \ +unless the input says which project. 'Buy groceries' does NOT imply any area. \ +If unsure, return empty string. It is MUCH better to leave these empty than to guess wrong. + +STEP 4 — Status: +Default to 'inbox' unless the input gives a clear signal: +- 'blocked' → input says something is blocked or waiting on someone +- 'ready' → input implies immediate action ('today', 'this afternoon', 'right now', 'need to') +- 'icebox' → input suggests maybe/someday ('might', 'eventually', 'one day', 'consider') +- 'inProgress' → input says already started or underway +If ambiguous, use 'inbox'. Most tasks should be 'inbox'. + +STEP 5 — Due date: +ONLY set if the input EXPLICITLY mentions a deadline or due date. \ +Look for words like: 'due', 'deadline', 'by [date]', 'must be done by', 'no later than'. \ +'Buy groceries for the week' has NO due date. 'Submit report by Friday' has a due date. \ +If no deadline language is present, return empty string. + +STEP 6 — Scheduled date: +ONLY set if the input implies WHEN to do the task. \ +Look for: 'today', 'tomorrow', 'on Monday', 'this Friday', 'next week', 'in two weeks', \ +a specific date reference. \ +'Buy groceries for the week' could mean today but is NOT certain — return empty string. \ +'Call the dentist tomorrow' → set to tomorrow's date. \ +The further away the implied date, the less likely it's a scheduled date (unless the \ +input explicitly says 'schedule for'). +If no timing language is present, return empty string. + +STEP 7 — Defer-until date: +ONLY set if the input EXPLICITLY mentions deferring or delaying. \ +Look for: 'defer', 'not until', 'starting from', 'becomes available', 'actionable on', \ +'don't start until', 'after [date]'. \ +This is rare. Most tasks will NOT have a defer date. Return empty string unless very clear. + +CRITICAL REMINDERS: +- Empty string is ALWAYS the safe default for optional fields. +- Guessing wrong is WORSE than leaving a field empty. +- The user will review your output and can easily add missing fields. +- The user CANNOT easily know which fields you invented vs extracted. +- When in doubt: empty string."; + +/// Build the structured context block showing areas and their projects. +fn build_context_block( + projects_with_areas: &[ProjectWithArea], + areas: &[NameIdPair], +) -> String { + // Group projects by area + let mut area_projects: std::collections::HashMap> = + std::collections::HashMap::new(); + let mut unassigned_projects: Vec = Vec::new(); + + for project in projects_with_areas { + if let Some(area_name) = &project.area_name { + area_projects + .entry(area_name.clone()) + .or_default() + .push(project.name.clone()); + } else { + unassigned_projects.push(project.name.clone()); + } + } + + let mut lines = vec!["Available areas and their projects:".to_string()]; + + for area in areas { + let projects = area_projects.get(&area.name); + match projects { + Some(p) if !p.is_empty() => { + lines.push(format!("- {} (area): {}", area.name, p.join(", "))); + } + _ => { + lines.push(format!("- {} (area): (no projects)", area.name)); + } + } + } + + if !unassigned_projects.is_empty() { + lines.push(format!( + "- (no area): {}", + unassigned_projects.join(", ") + )); + } + + lines.join("\n") +} diff --git a/tdn-desktop/src-tauri/src/commands/mod.rs b/tdn-desktop/src-tauri/src/commands/mod.rs index 02bcb1f3..94e34c56 100644 --- a/tdn-desktop/src-tauri/src/commands/mod.rs +++ b/tdn-desktop/src-tauri/src/commands/mod.rs @@ -4,6 +4,7 @@ //! Import specific commands via their submodule (e.g., `commands::preferences::greet`). pub mod ai; +pub mod ai_prompts; pub mod config; pub mod notifications; pub mod preferences; diff --git a/tdn-desktop/src-tauri/swift/apple_intelligence.swift b/tdn-desktop/src-tauri/swift/apple_intelligence.swift index 5ae1ee4b..503a4ec5 100644 --- a/tdn-desktop/src-tauri/swift/apple_intelligence.swift +++ b/tdn-desktop/src-tauri/swift/apple_intelligence.swift @@ -7,27 +7,27 @@ import FoundationModels @available(macOS 26.0, *) @Generable private struct ParsedTask: Sendable { - @Guide(description: "A concise task title summarizing the request") + @Guide(description: "Concise task title") let title: String - @Guide(description: "Empty string unless the input contains extra detail beyond the title") + @Guide(description: "Extra detail, or empty string") let body: String let status: ParsedStatus - @Guide(description: "ONLY if a due date is explicitly mentioned in the input. YYYY-MM-DD format. Empty string if no due date mentioned.") + @Guide(description: "YYYY-MM-DD or empty string") let due: String - @Guide(description: "ONLY if a scheduled date is explicitly mentioned in the input. YYYY-MM-DD format. Empty string if no scheduled date mentioned.") + @Guide(description: "YYYY-MM-DD or empty string") let scheduled: String - @Guide(description: "ONLY if a defer-until date is explicitly mentioned in the input. YYYY-MM-DD format. Empty string if no defer date mentioned.") + @Guide(description: "YYYY-MM-DD or empty string") let deferUntil: String - @Guide(description: "ONLY if the input explicitly mentions a project from the available list. Empty string if no project mentioned.") + @Guide(description: "Project name or empty string") let project: String - @Guide(description: "ONLY if the input explicitly mentions an area from the available list. Empty string if no area mentioned.") + @Guide(description: "Area name or empty string") let area: String } diff --git a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx index 69f8dce4..0384c936 100644 --- a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx +++ b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx @@ -248,13 +248,19 @@ export default function QuickPaneApp() { setIsProcessingAI(true) try { - // Build name/ID pairs for context - const projectPairs = projects.map(p => ({ id: p.id, name: p.title })) + // Build context with project→area relationships + const stripWikilink = (s: string) => + s.startsWith('[[') && s.endsWith(']]') ? s.slice(2, -2) : s + const projectContexts = projects.map(p => ({ + id: p.id, + name: p.title, + areaName: p.area ? stripWikilink(p.area) : null, + })) const areaPairs = areas.map(a => ({ id: a.id, name: a.title })) const result = await commands.processQuickEntryText( trimmedTitle, - projectPairs, + projectContexts, areaPairs ) diff --git a/tdn-desktop/src/lib/bindings.ts b/tdn-desktop/src/lib/bindings.ts index 31da455a..dbdf210c 100644 --- a/tdn-desktop/src/lib/bindings.ts +++ b/tdn-desktop/src/lib/bindings.ts @@ -364,9 +364,9 @@ async checkAppleIntelligenceAvailable() : Promise { * Process free-form text input using Apple Intelligence to extract structured task fields. * * Takes the raw text from the quick entry title field, plus lists of available - * projects and areas for context, and returns a parsed result with all fields populated. + * projects (with area relationships) and areas for context. */ -async processQuickEntryText(text: string, projects: NameIdPair[], areas: NameIdPair[]) : Promise> { +async processQuickEntryText(text: string, projects: ProjectContext[], areas: NameIdPair[]) : Promise> { try { return { status: "ok", data: await TAURI_INVOKE("process_quick_entry_text", { text, projects, areas }) }; } catch (e) { @@ -563,6 +563,14 @@ blockedBy: string[] | null; * Markdown body content (after frontmatter) */ body: string } +/** + * A project with its area relationship for richer AI context. + */ +export type ProjectContext = { id: string; name: string; +/** + * The area name this project belongs to (if any) + */ +areaName: string | null } /** * Project status enum matching S1 spec Section 4.5 */ From ef4c24afa9e4c95af56fff230f2e1e2bdc43366c Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Wed, 25 Mar 2026 22:16:08 +0000 Subject: [PATCH 07/32] WIP --- tdn-desktop/src-tauri/src/commands/ai.rs | 23 ++- .../src-tauri/src/commands/ai_prompts.rs | 144 +++++++++--------- .../src-tauri/swift/apple_intelligence.swift | 2 +- 3 files changed, 98 insertions(+), 71 deletions(-) diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs index 1dace847..dc13c31f 100644 --- a/tdn-desktop/src-tauri/src/commands/ai.rs +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -125,6 +125,23 @@ pub fn process_quick_entry_text( } } +/// Strip markdown code fences from a response (e.g. ```json\n{...}\n```) +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +fn strip_code_fences(s: &str) -> &str { + let trimmed = s.trim(); + if let Some(rest) = trimmed.strip_prefix("```") { + // Skip the language tag (e.g. "json") on the first line + let after_tag = rest.find('\n').map(|i| &rest[i + 1..]).unwrap_or(rest); + // Strip trailing fence + after_tag + .strip_suffix("```") + .unwrap_or(after_tag) + .trim() + } else { + trimmed + } +} + /// Parse the AI response JSON into a `ParsedQuickEntry`, resolving project/area names to IDs. #[cfg(all(target_os = "macos", target_arch = "aarch64"))] fn parse_ai_response( @@ -133,8 +150,10 @@ fn parse_ai_response( projects: &[ProjectContext], areas: &[NameIdPair], ) -> Result { - // Try to parse as JSON (structured output from @Generable) - if let Ok(parsed) = serde_json::from_str::(response) { + // Try to parse as JSON (structured output from @Generable). + // Also handles fallback where model returns JSON wrapped in markdown code fences. + let clean_response = strip_code_fences(response); + if let Ok(parsed) = serde_json::from_str::(clean_response) { let title = parsed["title"] .as_str() .unwrap_or(original_text) diff --git a/tdn-desktop/src-tauri/src/commands/ai_prompts.rs b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs index b34ee383..a73826fd 100644 --- a/tdn-desktop/src-tauri/src/commands/ai_prompts.rs +++ b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs @@ -1,8 +1,7 @@ //! AI prompt templates for Apple Intelligence quick entry processing. //! //! All prompt text is centralized here for easy iteration. -//! The system prompt guides the ~3B on-device model through step-by-step -//! decision-making for each task field. +//! Edit this file to refine how the on-device model parses task input. use super::ai::NameIdPair; @@ -20,79 +19,88 @@ pub fn build_system_prompt( day_of_week: &str, ) -> String { let context_block = build_context_block(projects_with_areas, areas); + let examples_block = build_examples_block(today); format!( - "{ROLE_AND_CONTEXT}\n\ + "{ROLE}\n\ \n\ Today is {today} ({day_of_week}).\n\ \n\ {context_block}\n\ \n\ - {STEP_BY_STEP_INSTRUCTIONS}" + {FIELD_INSTRUCTIONS}\n\ + \n\ + {examples_block}" ) } -const ROLE_AND_CONTEXT: &str = "\ -You are a task parser. You take free-form text (often dictated speech) and extract \ -structured task fields. You MUST only extract information that is ACTUALLY PRESENT \ -in the input. Do NOT invent, guess, or infer information that isn't there."; - -const STEP_BY_STEP_INSTRUCTIONS: &str = "\ -Follow these steps IN ORDER to decide each field. For every field, if the input does \ -not clearly indicate a value, you MUST return an empty string. - -STEP 1 — Title: -Create a concise, actionable task title from the input. Keep it short. \ -Do not just copy the input verbatim — clean it up and make it scannable. \ -Examples: 'I need to call the dentist about that appointment' → 'Call dentist about appointment' - -STEP 2 — Body: -If the input contains meaningful detail BEYOND what the title captures, put it here. \ -Otherwise return empty string. NEVER repeat or paraphrase the title. NEVER add \ -information that was not in the original input. - -STEP 3 — Project and Area: -ONLY set these if the input EXPLICITLY names or clearly references a specific project \ -or area from the available lists. 'upgrading the database' does NOT imply any project \ -unless the input says which project. 'Buy groceries' does NOT imply any area. \ -If unsure, return empty string. It is MUCH better to leave these empty than to guess wrong. - -STEP 4 — Status: -Default to 'inbox' unless the input gives a clear signal: -- 'blocked' → input says something is blocked or waiting on someone -- 'ready' → input implies immediate action ('today', 'this afternoon', 'right now', 'need to') -- 'icebox' → input suggests maybe/someday ('might', 'eventually', 'one day', 'consider') -- 'inProgress' → input says already started or underway -If ambiguous, use 'inbox'. Most tasks should be 'inbox'. - -STEP 5 — Due date: -ONLY set if the input EXPLICITLY mentions a deadline or due date. \ -Look for words like: 'due', 'deadline', 'by [date]', 'must be done by', 'no later than'. \ -'Buy groceries for the week' has NO due date. 'Submit report by Friday' has a due date. \ -If no deadline language is present, return empty string. - -STEP 6 — Scheduled date: -ONLY set if the input implies WHEN to do the task. \ -Look for: 'today', 'tomorrow', 'on Monday', 'this Friday', 'next week', 'in two weeks', \ -a specific date reference. \ -'Buy groceries for the week' could mean today but is NOT certain — return empty string. \ -'Call the dentist tomorrow' → set to tomorrow's date. \ -The further away the implied date, the less likely it's a scheduled date (unless the \ -input explicitly says 'schedule for'). -If no timing language is present, return empty string. - -STEP 7 — Defer-until date: -ONLY set if the input EXPLICITLY mentions deferring or delaying. \ -Look for: 'defer', 'not until', 'starting from', 'becomes available', 'actionable on', \ -'don't start until', 'after [date]'. \ -This is rare. Most tasks will NOT have a defer date. Return empty string unless very clear. - -CRITICAL REMINDERS: -- Empty string is ALWAYS the safe default for optional fields. -- Guessing wrong is WORSE than leaving a field empty. -- The user will review your output and can easily add missing fields. -- The user CANNOT easily know which fields you invented vs extracted. -- When in doubt: empty string."; +// ───────────────────────────────────────────────────────────────────────────── +// Prompt constants +// ───────────────────────────────────────────────────────────────────────────── + +const ROLE: &str = "\ +You are a task field extractor. Given free-form text, populate structured task fields. \ +Return empty string for any field where the input provides no clear value. \ +Empty string is always the safe choice."; + +const FIELD_INSTRUCTIONS: &str = "\ +Field instructions: + +title: Rewrite the input as a concise, actionable task title. + +body: Include only if the input has meaningful detail beyond what the title captures. \ +Otherwise empty string. + +status: Use 'inbox' unless the input clearly indicates otherwise. \ +Use 'ready' only for explicit immediacy ('today', 'this afternoon', 'right now'). \ +Use 'blocked' only if the input says something is blocked or waiting. \ +Use 'icebox' only for explicit maybe/someday language. \ +Use 'inProgress' only if the input says work has already started. + +project: Set only if the input explicitly names a project from the list above. \ +Empty string if no project is mentioned by name. + +area: Set only if the input explicitly names an area from the list above. \ +Empty string if no area is mentioned by name. + +due: Set only if the input contains deadline language ('due by', 'deadline', \ +'must be done by', 'no later than'). YYYY-MM-DD format. Empty string otherwise. + +scheduled: Set only if the input specifies when to do the task ('tomorrow', \ +'on Monday', 'this Friday', 'schedule for next week'). YYYY-MM-DD format. \ +Empty string otherwise. Vague time references ('for the week', 'soon') are \ +NOT scheduled dates — use empty string. + +deferUntil: Set only if the input explicitly mentions deferring ('not until', \ +'defer until', 'start after'). This is rare. YYYY-MM-DD format. \ +Empty string otherwise."; + +/// Build few-shot examples. These are the highest-impact technique for small models. +fn build_examples_block(today: &str) -> String { + // Compute tomorrow for the example + let tomorrow = chrono::NaiveDate::parse_from_str(today, "%Y-%m-%d") + .ok() + .and_then(|d| d.succ_opt()) + .map(|d| d.format("%Y-%m-%d").to_string()) + .unwrap_or_else(|| "tomorrow".to_string()); + + format!( + "Examples:\n\ + \n\ + Input: \"Buy groceries for the week\"\n\ + Output: {{\"title\":\"Buy groceries\",\"body\":\"\",\"status\":\"inbox\",\ + \"due\":\"\",\"scheduled\":\"\",\"deferUntil\":\"\",\"project\":\"\",\"area\":\"\"}}\n\ + \n\ + Input: \"Call the dentist tomorrow about that crown\"\n\ + Output: {{\"title\":\"Call dentist about crown\",\"body\":\"\",\"status\":\"ready\",\ + \"due\":\"\",\"scheduled\":\"{tomorrow}\",\"deferUntil\":\"\",\"project\":\"\",\"area\":\"\"}}\n\ + \n\ + Input: \"I need to submit the Q1 tax return by April 15th, gather all the receipts first\"\n\ + Output: {{\"title\":\"Submit Q1 tax return\",\"body\":\"Gather all receipts first.\",\ + \"status\":\"inbox\",\"due\":\"2026-04-15\",\"scheduled\":\"\",\"deferUntil\":\"\",\ + \"project\":\"Q1 Tax Preparation\",\"area\":\"\"}}" + ) +} /// Build the structured context block showing areas and their projects. fn build_context_block( @@ -115,16 +123,16 @@ fn build_context_block( } } - let mut lines = vec!["Available areas and their projects:".to_string()]; + let mut lines = vec!["Areas and projects:".to_string()]; for area in areas { let projects = area_projects.get(&area.name); match projects { Some(p) if !p.is_empty() => { - lines.push(format!("- {} (area): {}", area.name, p.join(", "))); + lines.push(format!("- {}: {}", area.name, p.join(", "))); } _ => { - lines.push(format!("- {} (area): (no projects)", area.name)); + lines.push(format!("- {}", area.name)); } } } diff --git a/tdn-desktop/src-tauri/swift/apple_intelligence.swift b/tdn-desktop/src-tauri/swift/apple_intelligence.swift index 503a4ec5..9ad9dfb7 100644 --- a/tdn-desktop/src-tauri/swift/apple_intelligence.swift +++ b/tdn-desktop/src-tauri/swift/apple_intelligence.swift @@ -1,7 +1,6 @@ import Dispatch import Foundation import FoundationModels - // MARK: - Generable types for structured task parsing @available(macOS 26.0, *) @@ -137,6 +136,7 @@ public func processTextWithSystemPrompt( return responsePtr } + // Use contentTagging adapter — optimized for extraction and classification tasks let model = SystemLanguageModel.default guard model.availability == .available else { responsePtr.pointee.error_message = duplicateCString( From f150bd85a67edfa38ef1bdc92c0df98022d77693 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Wed, 25 Mar 2026 22:44:06 +0000 Subject: [PATCH 08/32] Update task doc with implementation status and next phases Rewrites the task document to reflect completed work (Phases 1-3), documents learnings about Apple Intelligence ~3B model behaviour from hands-on testing and WWDC25 research, and outlines next phases: eval harness, few-shot contamination fix, deterministic date/project resolution, and polish. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../task-x-quick-entry-ai-processing.md | 217 +++++++++--------- 1 file changed, 104 insertions(+), 113 deletions(-) diff --git a/docs/tasks-todo/task-x-quick-entry-ai-processing.md b/docs/tasks-todo/task-x-quick-entry-ai-processing.md index 5485ddf2..654941ee 100644 --- a/docs/tasks-todo/task-x-quick-entry-ai-processing.md +++ b/docs/tasks-todo/task-x-quick-entry-ai-processing.md @@ -32,156 +32,147 @@ This does not involve voice-to-text transcription — we assume users have a tra The quick entry pane is a compact floating card with: title input (top), metadata row with status/dates (middle), footer with project/area selectors + cancel/save (bottom). The AI processing button should sit adjacent to the title input area since that's where the action happens. -## Background: How Handy Does This +## Background: Handy Reference Implementation -The Handy codebase (`~/dev/handy`) has a production-grade Apple Intelligence integration for post-processing transcriptions. It provides a proven Tauri ↔ Swift bridge pattern that we should follow closely. +The Handy codebase (`~/dev/handy`) has a production-grade Apple Intelligence integration. Our Swift bridge is adapted from it. -### Architecture +Key files in Handy for reference: `src-tauri/swift/apple_intelligence.swift`, `src-tauri/swift/apple_intelligence_bridge.h`, `src-tauri/src/apple_intelligence.rs`, `src-tauri/build.rs`. -``` -React frontend - → Tauri command (Rust) - → C FFI (unsafe) - → Swift FoundationModels API - → On-device ~3B model inference - ← Structured response (@Generable) - ← Result - ← Populate form fields -``` +Critical gotchas discovered via Handy: SIGABRT if accessing `SystemLanguageModel.default` during app init (defer to runtime); async→sync bridge via `DispatchSemaphore`; weak-link FoundationModels for older macOS compatibility; LLMs insert invisible Unicode chars (strip them); `@Generable` can fail (always have plain-text fallback). -### Key Files to Reference in Handy +## Current Implementation Status -| File | What it does | -|------|-------------| -| `src-tauri/swift/apple_intelligence.swift` | Real implementation (~144 lines). `@Generable` struct, `LanguageModelSession`, structured output with plain-text fallback, `DispatchSemaphore` for async→sync bridge | -| `src-tauri/swift/apple_intelligence_stub.swift` | Stub compiled when SDK lacks FoundationModels (~46 lines) | -| `src-tauri/swift/apple_intelligence_bridge.h` | C header defining `AppleLLMResponse` struct and FFI function signatures | -| `src-tauri/src/apple_intelligence.rs` | Rust wrapper with safe abstractions over the C FFI | -| `src-tauri/build.rs` | `build_apple_intelligence_bridge()` — SDK detection via `xcrun`, `swiftc` compilation, `libtool` for static lib, weak framework linking | +### Completed (Phases 1-3) -### Critical Gotchas Discovered by Handy +**Swift bridge:** `@Generable ParsedTask` struct with `ParsedStatus` enum, `LanguageModelSession` with structured output + plain-text fallback, availability check. Build script with SDK detection, stub compilation, weak linking. All adapted from Handy. -1. **SIGABRT on init:** Cannot access `SystemLanguageModel.default` during app initialization on macOS 26 — must defer the availability check to runtime (when the user actually tries to use the feature). -2. **Async→sync bridge:** Swift `async/await` called from synchronous Rust FFI. Uses `DispatchSemaphore` + `Task.detached(priority: .userInitiated)` with a thread-safe `ResultBox`. -3. **Weak linking:** Must use `-weak_framework FoundationModels` so the app launches on older macOS. Deployment target is macOS 11.0 with `@available(macOS 26.0, *)` runtime checks. -4. **Invisible Unicode:** LLMs sometimes insert zero-width spaces (`\u{200B}`, `\u{200C}`, `\u{200D}`, `\u{FEFF}`) — strip them from output. -5. **Structured output fallback:** `@Generable` can fail — always have a plain-text fallback path. -6. **Build-time SDK detection:** Check for `FoundationModels.framework` in the SDK path. If absent, compile the stub instead. +**Rust layer:** Safe FFI wrapper (`apple_intelligence.rs`), Tauri commands (`commands/ai.rs`), centralized prompt templates (`commands/ai_prompts.rs`). System prompt with step-by-step field instructions, few-shot examples, and structured area→project context. Response parsing with date validation, project/area name→ID matching, body deduplication logic. -## Implementation Plan +**Frontend:** Sparkles button in title row (conditionally rendered when AI available + text entered), `Cmd+Shift+A` shortcut (active only when pane open), loading spinner, form field population from AI result. Feature is completely invisible when Apple Intelligence is unavailable. -### Phase 1: Swift Bridge (Apple Intelligence integration layer) +**Bug fix (pre-existing):** Fixed wikilinks using hash IDs instead of entity titles in all four write paths (create_task, create_project, update_task, update_project). -Set up the Tauri ↔ Swift FFI bridge, closely following Handy's pattern. +### Key files -**Files to create:** -- `src-tauri/swift/apple_intelligence.swift` — The `@Generable` struct for parsed tasks, inference function, availability check -- `src-tauri/swift/apple_intelligence_stub.swift` — Fallback for builds without FoundationModels SDK -- `src-tauri/swift/apple_intelligence_bridge.h` — C-compatible struct and function declarations +| File | Purpose | +|------|---------| +| `src-tauri/swift/apple_intelligence.swift` | `@Generable` struct, LLM session, FFI functions | +| `src-tauri/swift/apple_intelligence_stub.swift` | Stub for builds without FoundationModels SDK | +| `src-tauri/swift/apple_intelligence_bridge.h` | C header for Swift ↔ Rust FFI | +| `src-tauri/src/apple_intelligence.rs` | Safe Rust wrapper over C FFI | +| `src-tauri/src/commands/ai.rs` | Tauri commands, response parsing, field validation | +| `src-tauri/src/commands/ai_prompts.rs` | All prompt text centralized for iteration | +| `src/components/quick-pane/QuickPaneApp.tsx` | AI processing handler, availability state | +| `src/components/quick-pane/QuickPaneTitle.tsx` | Sparkles button, loading state | +| `src/components/quick-pane/useQuickPaneKeyboard.ts` | `Cmd+Shift+A` shortcut | -**Files to modify:** -- `src-tauri/build.rs` — Add `build_apple_intelligence_bridge()` (can adapt directly from Handy's `build.rs`) +## Learnings About Apple Intelligence (~3B Model) -**Key design detail — the `@Generable` struct:** +These findings are from hands-on testing and WWDC25 research. They should inform all future prompt work. -```swift -@Generable -struct ParsedTask: Sendable { - @Guide(description: "A concise task title summarizing the request") - let title: String +### What works well - @Guide(description: "Additional context or notes, empty string if none") - let body: String +- **`@Generable` with `@Guide(description:)` for structured output.** The model reliably produces valid JSON matching the struct. `ParsedStatus` enum gives constrained decoding for free. +- **Few-shot examples are the single highest-impact technique.** Adding 2-3 input→output examples dramatically improved field accuracy vs. instructions alone. +- **"Empty string is the safe default" framing works.** Combined with few-shot examples showing empty fields, the model stopped hallucinating dates for simple inputs. +- **Project/area name validation in Rust catches hallucinations.** The model sometimes invents project/area names that don't exist; case-insensitive exact matching in Rust silently drops them. +- **Title generation is reliable.** The model consistently produces clean, concise titles. - let status: ParsedStatus +### What doesn't work - @Guide(description: "Due date in YYYY-MM-DD format, empty string if none") - let due: String +- **Date arithmetic is unreliable.** "This Friday" from Wednesday March 25 → model returned March 30 (Monday, wrong). "Next Monday" → April 2 (Thursday, wrong). "End of the month" → October 31 (wrong month entirely). Apple explicitly says "avoid asking the model to act as a calculator." +- **Few-shot contamination.** When an input is similar to a few-shot example, the model copies fields from the example. "Submit Q1 tax return by April 15th" copied the body "Gather all receipts first" from the similar example — the input never mentioned receipts. +- **Project name fuzzy matching.** "Japan Trip" in the input didn't match "Japan Trip 2025" in the project list. The model returned empty rather than approximate-matching. Our Rust validation uses exact match only. +- **`@Guide(Regex{...})` breaks `@Generable`.** Regex constraints on date fields caused structured output to fail entirely, falling back to plain text. The `.default` model doesn't support regex-constrained generation well. Removed in favour of description-only guides. +- **`contentTagging` adapter is wrong for this task.** It's optimized for tag generation, not instruction-following. Produced topic tags ("task management, shopping") instead of following our field instructions. +- **Body generation for complex inputs.** The model sometimes fabricates body content not present in the input. - @Guide(description: "Scheduled date in YYYY-MM-DD format, empty string if none") - let scheduled: String +### Key principles for prompt iteration - @Guide(description: "Defer-until date in YYYY-MM-DD format, empty string if none") - let deferUntil: String +1. **Positive framing outperforms negative.** "Set only if X is present" beats "Do NOT set unless X." +2. **Short instructions beat long ones.** Every token adds latency. Use `@Guide` for per-field constraints, prompt for high-level guidance. +3. **Chain-of-thought HURTS models under ~10B.** Don't ask the model to reason step-by-step. +4. **Few-shot examples need to be distinct from likely inputs** to avoid contamination. +5. **Structural constraints (enums, `@Guide(.anyOf)`) are stronger than description text** — but `.anyOf` needs compile-time values, limiting use for dynamic lists. - @Guide(description: "Exact project name from the available list, empty string if none") - let project: String +## Next Steps - @Guide(description: "Exact area name from the available list, empty string if none") - let area: String -} +### Phase 5: Evaluation Harness + +Build a development tool for rapid prompt iteration. This is NOT part of the normal test suite — it requires a live Apple Intelligence model on the device. + +**Approach:** Rust `#[ignore]` integration test that: +- Uses the real Swift FFI bridge (same code path as production) +- Has a fixed set of ~15 test cases with input text + expected field values +- Uses fixed context (hardcoded projects, areas, date) for reproducibility +- Calls `build_system_prompt` and `process_text` directly (no Tauri/frontend) +- Outputs a per-field pass/fail summary table +- Runnable via `cargo test eval_ai -- --ignored` (or a `bun run` alias) -@Generable -enum ParsedStatus { - case inbox - case icebox - case ready - case inProgress - case blocked +**Test case structure:** +```rust +EvalCase { + input: "Email James about the Japan Trip, schedule for next Monday", + expected_title_contains: "Email James", // substring match, not exact + expected_status: "inbox", + expected_project: Some("Japan Trip 2025"), + expected_area: None, + expected_scheduled: Some("2026-03-30"), // next Monday + expected_due: None, + expected_defer: None, } ``` -**Key design detail — status as enum:** Task statuses are known at compile time, so using a `@Generable enum` gives us constrained decoding for free — the model literally cannot output an invalid status. The enum omits `done` and `dropped` since those don't make sense for newly-created tasks. +Field matching should be flexible: substring for titles, exact for status/dates, optional for project/area (Some = must match, None = must be empty). This lets us measure regression when changing prompts. -**Key design detail — date handling:** The system prompt includes today's date and day of week. The LLM outputs dates directly in `YYYY-MM-DD` format. The ~3B model should handle common relative date arithmetic ("in 3 weeks", "next Tuesday", "end of April") well enough given today's date as context. If it occasionally gets a date wrong, the user corrects it during the review step — this is no worse than an empty field. Rust validates that returned date strings are valid `YYYY-MM-DD` and discards any that aren't. +### Phase 6: Fix Few-Shot Contamination -**Key design detail — project/area matching:** The `@Guide(.anyOf([...]))` constraint requires compile-time values, but project/area names are dynamic per-user. Instead: list valid names in the system prompt instructions and use `@Guide(description:)` for guidance. In Rust, validate the returned name against the actual list using case-insensitive exact match. If no match, leave the field empty for the user to set manually. +Remove or redesign the third few-shot example (Q1 tax return) to avoid body contamination. Options: +- Make the example input much more distinct from likely real inputs +- Use a fictional project/area name that doesn't appear in real data +- Remove body content from all examples (always show `"body":""`) -### Phase 2: Rust Layer (command, prompt building, response handling) +This is a quick prompt-only change in `ai_prompts.rs`, testable via the eval harness. -**Files to create:** -- `src-tauri/src/apple_intelligence.rs` — Safe Rust wrapper over the C FFI (adapt from Handy) +### Phase 7: Deterministic Date and Project/Area Resolution -**Files to modify:** -- `src-tauri/src/commands/` — New Tauri command `process_quick_entry_text` -- `src-tauri/src/lib.rs` or `mod.rs` — Register the new module and command +Split the work into what the LLM is good at (language understanding, intent classification) and what deterministic code is good at (date arithmetic, fuzzy string matching). -**The Tauri command should:** -1. Accept: raw text, list of area names+IDs, list of project names+IDs -2. Build system prompt: role description, today's date + day of week, available project/area names, formatting rules -3. Call Swift FFI with system prompt + raw text -4. Deserialize the `ParsedTask` response (JSON) -5. Validate date strings are valid `YYYY-MM-DD` (discard invalid ones) -6. Match returned project/area names to actual IDs (case-insensitive exact match; no match = empty) -7. Return a typed result struct with all resolved fields +**Date resolution:** -**System prompt template (built in Rust):** +Change the `@Generable` struct so date fields capture the *raw reference and intent* rather than computed YYYY-MM-DD dates: +```swift +@Guide(description: "Raw date/time reference for scheduling intent, or empty string") +let scheduledRef: String // e.g. "tomorrow", "next Monday", "this Friday" + +@Guide(description: "Raw date/time reference for deadline intent, or empty string") +let dueRef: String // e.g. "by April 15th", "by end of next week" ``` -You are a task parser. Extract structured task fields from free-form input. -Today is {date} ({day_of_week}). - -Available projects: {comma-separated names} -Available areas: {comma-separated names} - -Rules: -- Create a concise, actionable title (not the raw input verbatim) -- Match project/area names exactly from the lists above, or return empty -- Convert any relative dates to YYYY-MM-DD format based on today's date -- Default status to inbox unless clearly stated otherwise -- Put any detail beyond the title into the body field -``` -### Phase 3: Frontend Integration +The LLM's job becomes: (1) identify whether a date reference exists, (2) classify it as scheduled vs. due vs. defer intent, (3) extract the reference text. Crucially, the LLM still decides whether "this Friday" is a scheduling intent for *this task* vs. just contextual information about something else — that's a language understanding judgment the LLM should make. + +Rust then resolves the expression to a date deterministically. Options for date parsing in Rust: +- `chrono` with hand-written pattern matching for common expressions +- A crate like `dateparser` or `chrono-english` (evaluate coverage) +- Simple keyword-based resolution ("tomorrow" → +1 day, "next Monday" → find next Monday, "April 15th" → parse month+day) + +Start with a small set of common patterns and fall back to empty if unparseable. The eval harness will show which patterns are most needed. -**Files to modify:** -- `src/components/quick-pane/QuickPaneApp.tsx` — Add AI processing state, handler, availability check on pane open -- `src/components/quick-pane/QuickPaneTitle.tsx` — Add the AI button adjacent to the title input (conditionally rendered) -- `src/components/quick-pane/useQuickPaneKeyboard.ts` — Add `Cmd+Shift+A` shortcut +**Project/area matching:** -**Behaviour:** -1. On pane open (focus event), also call `commands.checkAppleIntelligenceAvailable()`. Store result in state. If unavailable, skip rendering button and registering shortcut. -2. When triggered (button or `Cmd+Shift+A`): grab current title text, show loading state (e.g. subtle spinner on the button, disable form briefly), call `commands.processQuickEntryText(...)`. -3. On success: populate title, body (with show-body toggled on), status, dates, project, area from the response. The body should contain the original raw text. -4. On error: leave form unchanged, optionally log the error. No toast or disruptive error UI. -5. User reviews populated fields and saves normally with `Cmd+Enter`. +Add fuzzy matching in Rust alongside the existing exact match. "Japan Trip" should match "Japan Trip 2025". Options: +- Case-insensitive substring matching (simplest) +- Levenshtein distance with a threshold +- Token overlap (split on spaces, check how many words match) -**Loading state:** Keep it minimal — a spinner or pulse animation on the AI button, lasting ~1-5 seconds. Don't disable the entire form (the user might want to cancel with Escape during processing). +Start with case-insensitive substring (covers the "Japan Trip" case) and evaluate via the harness. -### Phase 4: Testing and Polish +### Phase 8: Polish and Edge Cases -- Test with various dictation styles: short commands, long rambling input, ambiguous dates, misspelled project names, non-English input -- Test availability detection: verify feature is invisible on Intel Macs, older macOS, Apple Intelligence disabled -- Test the build on machines without the FoundationModels SDK (stub compilation) -- Test edge cases: empty input, very long input (context window), input that's already a clean title -- Consider whether re-processing should be supported (user processes, edits, processes again) +- Re-processing support (user processes, edits title, processes again) +- Cancellation during processing (Escape while LLM is running) +- Very long input handling (context window limits) +- Non-English input behaviour +- Test on machines without FoundationModels SDK (stub build) +- Test availability detection on Intel Macs / older macOS From 350c168cf4b20ed4a66cd15c4bb292a66fcbb7bb Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Wed, 25 Mar 2026 23:02:44 +0000 Subject: [PATCH 09/32] Add developer doc for Apple Intelligence quick entry processing Documents the end-to-end architecture, key components, prompt system, response parsing pipeline, frontend integration, logging, and known limitations. Intended as an evergreen reference for anyone working on or extending the AI enrichment feature. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../docs/developer/apple-intelligence.md | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 tdn-desktop/docs/developer/apple-intelligence.md diff --git a/tdn-desktop/docs/developer/apple-intelligence.md b/tdn-desktop/docs/developer/apple-intelligence.md new file mode 100644 index 00000000..f9506bc7 --- /dev/null +++ b/tdn-desktop/docs/developer/apple-intelligence.md @@ -0,0 +1,177 @@ +# Apple Intelligence Quick Entry Processing + +The quick entry pane supports AI-powered processing of free-form text input using Apple's on-device Foundation Models framework (~3B parameter model). Users type or dictate natural language and the system parses it into structured task fields. + +## Availability + +- macOS 26.0+ (Tahoe) on Apple Silicon only +- Apple Intelligence must be enabled in System Settings +- Feature is completely invisible when unavailable (no button, no shortcut) +- App weak-links FoundationModels so it launches on older macOS +- Build falls back to a stub implementation when the SDK lacks FoundationModels + +## Architecture + +``` +React (QuickPaneApp) + │ builds project/area context from loaded vault data + │ strips wikilinks from project area references + │ + ▼ +Tauri command: process_quick_entry_text() + │ builds system prompt (ai_prompts.rs) with: + │ - role + field instructions + │ - today's date + │ - area→project relationships + │ - few-shot examples + │ + ▼ +Rust FFI wrapper: apple_intelligence::process_text() + │ converts to CStrings, calls Swift, frees C memory + │ + ▼ +Swift: processTextWithSystemPrompt() + │ creates LanguageModelSession with system prompt as instructions + │ calls session.respond(to: userText, generating: ParsedTask.self) + │ ParsedTask is a @Generable struct — constrained decoding + │ falls back to plain text if @Generable fails + │ serializes to JSON, strips invisible Unicode chars + │ + ▼ +Rust: parse_ai_response() + │ strips markdown code fences (fallback path) + │ parses JSON + │ validates dates (YYYY-MM-DD or discard) + │ matches project/area names → IDs (case-insensitive exact) + │ applies body logic (preserve original text, deduplicate) + │ validates status against known values + │ + ▼ +React: populates form fields + user reviews and saves normally +``` + +## Key Components + +### Swift Bridge + +Three files in `src-tauri/swift/`: + +- `apple_intelligence.swift` — The real implementation. Contains the `@Generable ParsedTask` struct with `ParsedStatus` enum, the `LanguageModelSession` call, JSON serialization, and availability check. +- `apple_intelligence_stub.swift` — Compiled instead when the build SDK lacks FoundationModels. All functions return errors. +- `apple_intelligence_bridge.h` — C header defining the `AppleLLMResponse` struct and function signatures shared between Swift and Rust. + +The Swift code bridges async/await to synchronous C using `DispatchSemaphore` + `Task.detached`. Memory is managed manually — `strdup` for C strings, `free` on the Rust side via `free_apple_llm_response`. + +### Build Script + +`build.rs` contains `build_apple_intelligence_bridge()` (gated to macOS ARM64). It detects whether the SDK has FoundationModels, compiles the appropriate Swift file with `swiftc`, creates a static library with `libtool`, and sets up linking. Key detail: `-weak_framework FoundationModels` allows the app to launch on older macOS. + +### Rust FFI Wrapper + +`src/apple_intelligence.rs` provides safe Rust functions over the unsafe C FFI: +- `check_availability()` → `bool` +- `process_text(system_prompt, user_content, max_tokens)` → `Result` + +### Tauri Commands + +`src/commands/ai.rs` exposes two commands to the frontend: +- `check_apple_intelligence_available()` → `bool` +- `process_quick_entry_text(text, projects, areas)` → `Result` + +The command builds the system prompt, calls the FFI, parses the response, validates fields, and resolves project/area names to IDs. + +### Prompt Templates + +`src/commands/ai_prompts.rs` centralizes all prompt text. This is the primary file to edit when iterating on prompt quality. It contains: + +- `build_system_prompt()` — Assembles the complete prompt from role text, context, field instructions, and few-shot examples +- `build_context_block()` — Formats areas and their projects as a structured list +- `build_examples_block()` — Generates few-shot input→output pairs (dynamically computes "tomorrow" from today's date) + +## The @Generable Struct + +```swift +@Generable +struct ParsedTask: Sendable { + let title: String // concise task title + let body: String // extra detail, or empty string + let status: ParsedStatus // constrained enum + let due: String // YYYY-MM-DD or empty string + let scheduled: String // YYYY-MM-DD or empty string + let deferUntil: String // YYYY-MM-DD or empty string + let project: String // project name or empty string + let area: String // area name or empty string +} + +@Generable +enum ParsedStatus: Sendable { + case inbox, icebox, ready, inProgress, blocked +} +``` + +`@Generable` uses constrained decoding — the model's token generation is structurally constrained to produce valid output matching the struct. The `ParsedStatus` enum means the model literally cannot output an invalid status. + +Each field has a `@Guide(description:)` annotation providing a short hint. The system prompt carries the detailed decision-making instructions. + +Properties generate in declaration order. Later properties can be influenced by earlier ones. Title is first (most important), optional fields are last. + +## Response Parsing Pipeline + +After receiving the JSON from Swift, Rust applies several transformations: + +**Code fence stripping:** If `@Generable` fails and the fallback produces a markdown-wrapped JSON block (`` ```json...``` ``), the fences are stripped before parsing. + +**Body logic:** The raw dictated text is preserved in the body when the title was transformed (title != original input). If the AI also generated body text, it's appended only if it adds genuinely new content — checked via `is_essentially_same()` which normalises case and trailing punctuation to avoid duplication. + +**Date validation:** Each date string is parsed with `chrono::NaiveDate`. Valid YYYY-MM-DD is kept, anything else is silently discarded. + +**Project/area matching:** The model returns a name string. Rust does case-insensitive exact match against the provided list. No match → field is left empty. (Fuzzy matching is a planned improvement.) + +**Status validation:** Must be one of `inbox`, `icebox`, `ready`, `in-progress`, `blocked`. Anything else defaults to `inbox`. + +## Frontend Integration + +The sparkles button in `QuickPaneTitle` is conditionally rendered: `aiAvailable && value.trim().length > 0`. It shows a spinner during processing. + +The `Cmd+Shift+A` shortcut is registered in `useQuickPaneKeyboard` only when `onProcessWithAI` is provided (which only happens when AI is available). + +On successful processing, the handler populates all form state setters. The body section auto-expands if body content was generated. + +## Logging + +All AI processing is logged at INFO level with a clear delimiter: + +``` +── AI Quick Entry ────────────────────────────────── +Input: "Buy groceries for the week" +Raw response: {"title":"Buy groceries","body":"",... } +Mapped result: + title: "Buy groceries" + body: "Buy groceries for the week" + status: "inbox" + due: None + ... +──────────────────────────────────────────────────── +``` + +The full system prompt is logged at DEBUG level. To see it, check the Tauri dev server output. + +## Iterating on Prompts + +1. Edit `src/commands/ai_prompts.rs` — all prompt text is here +2. Restart the dev server (`bun run tauri:dev`) +3. Test with the quick pane +4. Check the server logs for raw response and mapped result +5. The system prompt appears at DEBUG level in logs + +The few-shot examples in `build_examples_block()` are the highest-impact thing to change. Keep examples distinct from likely real inputs to avoid contamination (the model copying example content into real responses). + +## Known Limitations + +- **Date arithmetic is unreliable.** The 3B model frequently gets relative date calculations wrong ("this Friday" off by days, "end of the month" wrong month). Planned fix: have the LLM extract raw date expressions and resolve them deterministically in Rust. +- **Project name matching is exact only.** "Japan Trip" won't match "Japan Trip 2025". Planned fix: fuzzy matching in Rust. +- **Few-shot contamination.** If an input is similar to a few-shot example, the model may copy fields from the example rather than generating from the actual input. +- **Body generation for complex inputs.** The model sometimes fabricates body content not present in the input. +- **`@Guide(Regex{...})` is incompatible with `.default` model.** Regex constraints cause `@Generable` to fail, falling back to plain text. Use `@Guide(description:)` only. +- **`contentTagging` adapter is wrong for this task.** It produces topic tags instead of following structured extraction instructions. Use `.default`. From 1ff6b2c97445e3e517956f85135f048cb928f8d4 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 00:10:07 +0000 Subject: [PATCH 10/32] Add step-by-step walkthrough to Apple Intelligence developer doc Adds a detailed narrative walkthrough (steps 1-8) explaining how the feature works from user action through to saved task. Covers what happens at each layer and why, alongside the existing architecture diagram and component reference. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../docs/developer/apple-intelligence.md | 64 ++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/tdn-desktop/docs/developer/apple-intelligence.md b/tdn-desktop/docs/developer/apple-intelligence.md index f9506bc7..e704ab40 100644 --- a/tdn-desktop/docs/developer/apple-intelligence.md +++ b/tdn-desktop/docs/developer/apple-intelligence.md @@ -10,7 +10,69 @@ The quick entry pane supports AI-powered processing of free-form text input usin - App weak-links FoundationModels so it launches on older macOS - Build falls back to a stub implementation when the SDK lacks FoundationModels -## Architecture +## How It Works (Step by Step) + +### 1. User opens the quick pane + +User presses `Cmd+Shift+.` (global shortcut). The quick pane React app receives a focus event and loads areas, projects, and checks AI availability in parallel. The availability check goes through Rust → Swift FFI → `SystemLanguageModel.default.availability`. If Apple Intelligence is available, the sparkles button becomes visible once the user types something. + +### 2. User types and triggers AI processing + +User types or dictates free-form text into the title field (e.g. "Email James about the Japan Trip, schedule for next Monday") and clicks the sparkles button or presses `Cmd+Shift+A`. + +The frontend builds context from the loaded vault data: each project as a name + ID + parent area name (stripping wikilink brackets from the area reference), and each area as a name + ID. This context tells the LLM what projects and areas exist so it can match against them. + +### 3. Rust builds the system prompt + +The Tauri command `process_quick_entry_text` receives the raw text and context. It gets today's date and day of week, then calls `ai_prompts::build_system_prompt()` which assembles the complete prompt from: + +- A short role statement ("You are a task field extractor...") +- Today's date and day of week +- A structured list of areas and their projects (e.g. "Acme Corp: Acme Dashboard Redesign") +- Per-field instructions explaining when to set each field and when to leave it empty +- 2-3 few-shot examples showing input text → expected JSON output, including an example where most fields are empty + +The few-shot examples are the single highest-impact part of the prompt. They teach the model the expected output format and, critically, that leaving fields empty is the right thing to do when information isn't present. + +### 4. Rust calls Swift via C FFI + +The system prompt and user text are converted to C strings and passed through the FFI boundary to Swift. The Rust side handles all memory management — it converts to `CString` for the call and frees the response via `free_apple_llm_response` afterwards. + +### 5. Swift runs on-device inference + +The Swift function creates a `LanguageModelSession` with the system prompt as `instructions` (which the model is trained to prioritise over user input). It then calls `session.respond(to: userText, generating: ParsedTask.self)`. + +`ParsedTask` is a `@Generable` struct — this is Apple's constrained decoding system. The model's token generation is structurally constrained to produce valid output matching the struct's fields. The `ParsedStatus` enum means the model literally cannot output an invalid status value. + +If `@Generable` succeeds (the normal path), the typed `ParsedTask` struct is manually serialized to a JSON string. If it fails (rare), the function falls back to a plain `session.respond()` call — the model typically returns a JSON code block in this case. + +Because the Swift call is `async` but the C FFI is synchronous, a `DispatchSemaphore` bridges the two. A detached task runs the inference, signals the semaphore on completion, and the calling thread blocks until it's done. This takes ~2-3 seconds on Apple Silicon. + +### 6. Rust parses and validates the response + +Back in Rust, `parse_ai_response()` processes the JSON string through several stages: + +**Code fence stripping:** If the fallback path produced a markdown-wrapped JSON block (`` ```json...``` ``), the fences are stripped so `serde_json` can parse it. + +**Title extraction:** The model's title is used. If JSON parsing failed entirely, the original input text becomes the title. + +**Body logic:** If the model transformed the title (it differs from the original input), the original text is preserved in the body — this ensures no context from dictation is lost. If the model also generated body text, it's only appended if it contains genuinely new information. A normalisation check (`is_essentially_same`) catches cases where the model just parrots the input back with minor punctuation changes. + +**Date validation:** Each date string is parsed with `chrono::NaiveDate`. Valid YYYY-MM-DD is kept. Empty strings become `None`. Anything else (malformed dates, random text) is silently discarded. + +**Project/area matching:** The model returns a project or area name as a string. Rust does case-insensitive exact match against the provided list of names. A match returns the entity's hash ID (which the frontend uses for the dropdown selectors). No match → the field is left empty for the user to set manually. This is a deliberate safety net — the model sometimes hallucinates project/area names that don't exist, and the exact matching silently drops them. + +**Status validation:** Must be one of `inbox`, `icebox`, `ready`, `in-progress`, `blocked`. Anything else defaults to `inbox`. + +### 7. Frontend populates the form + +The React handler receives the `ParsedQuickEntry` result and sets each piece of form state: title, body (with the body section auto-expanding if populated), status, dates, project ID, and area ID. The UI updates immediately — the user sees fields filled in and can adjust anything before saving. + +### 8. User reviews and saves + +The user presses `Cmd+Enter` to save. From here the flow is identical to a manually-entered task: `createTask` writes the file to disk with the appropriate frontmatter, and `updateTask` adds the body content. The vault index is updated and the main window receives a `task-created` event. + +## Architecture Diagram ``` React (QuickPaneApp) From ab4726490b91b547e838d559c6df60e82c6a99c9 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 00:15:45 +0000 Subject: [PATCH 11/32] Tweak task doc --- docs/tasks-todo/task-x-quick-entry-ai-processing.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/tasks-todo/task-x-quick-entry-ai-processing.md b/docs/tasks-todo/task-x-quick-entry-ai-processing.md index 654941ee..c84f32c0 100644 --- a/docs/tasks-todo/task-x-quick-entry-ai-processing.md +++ b/docs/tasks-todo/task-x-quick-entry-ai-processing.md @@ -172,7 +172,4 @@ Start with case-insensitive substring (covers the "Japan Trip" case) and evaluat - Re-processing support (user processes, edits title, processes again) - Cancellation during processing (Escape while LLM is running) -- Very long input handling (context window limits) -- Non-English input behaviour -- Test on machines without FoundationModels SDK (stub build) -- Test availability detection on Intel Macs / older macOS +- Very long input handling (context window limits?) From 9257a3f4933fb740a462d57605bbeb1c9add013d Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 00:20:00 +0000 Subject: [PATCH 12/32] Add AI evaluation harness for prompt iteration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a #[ignore] test that runs 13 inputs through the full AI pipeline (prompt building → Apple Intelligence → response parsing) and compares against expected outputs. Prints a per-case pass/fail summary. Run with: cargo test -p taskdn-desktop eval_ai -- --ignored --nocapture Uses fixed context (projects, areas, date) for reproducibility. Does not assert on failure — this is a development tool for measuring prompt quality, not a hard test. Co-Authored-By: Claude Opus 4.6 (1M context) --- tdn-desktop/src-tauri/src/commands/ai.rs | 417 +++++++++++++++++++++++ 1 file changed, 417 insertions(+) diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs index dc13c31f..4d3f13ac 100644 --- a/tdn-desktop/src-tauri/src/commands/ai.rs +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -288,3 +288,420 @@ fn match_name_to_id(name: &str, pairs: &[NameIdPair]) -> Option { .find(|p| p.name.eq_ignore_ascii_case(name)) .map(|p| p.id.clone()) } + +// ============================================================================= +// Evaluation Harness +// ============================================================================= +// +// A development tool for iterating on prompt quality. NOT part of the normal +// test suite — requires a live Apple Intelligence model on the device. +// +// Run with: cargo test -p taskdn-desktop eval_ai -- --ignored --nocapture +// +// Each test case sends real text through the full pipeline (prompt building → +// Apple Intelligence → response parsing) and compares against expectations. + +#[cfg(test)] +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +mod eval { + use super::*; + + // ── Fixed context for reproducible evaluation ──────────────────────── + + const EVAL_DATE: &str = "2026-03-25"; + const EVAL_DAY: &str = "Wednesday"; + + fn eval_projects() -> Vec { + vec![ + ProjectContext { id: "p-japan".into(), name: "Japan Trip 2025".into(), area_name: Some("Travel".into()) }, + ProjectContext { id: "p-acme".into(), name: "Acme Dashboard Redesign".into(), area_name: Some("Acme Corp".into()) }, + ProjectContext { id: "p-tax".into(), name: "Q1 Tax Preparation".into(), area_name: Some("Finance".into()) }, + ProjectContext { id: "p-blog".into(), name: "Tech Blog Relaunch".into(), area_name: Some("Writing".into()) }, + ProjectContext { id: "p-cli".into(), name: "Open Source CLI Tool".into(), area_name: Some("Coding".into()) }, + ProjectContext { id: "p-marathon".into(), name: "Half Marathon Training".into(), area_name: Some("Health".into()) }, + ProjectContext { id: "p-office".into(), name: "Home Office Setup".into(), area_name: Some("Home".into()) }, + ProjectContext { id: "p-garden".into(), name: "Garden Renovation".into(), area_name: Some("Home".into()) }, + ProjectContext { id: "p-newsletter".into(), name: "Newsletter Setup".into(), area_name: Some("Writing".into()) }, + ProjectContext { id: "p-rust".into(), name: "Learn Rust".into(), area_name: Some("Learning".into()) }, + ] + } + + fn eval_areas() -> Vec { + vec![ + NameIdPair { id: "a-travel".into(), name: "Travel".into() }, + NameIdPair { id: "a-acme".into(), name: "Acme Corp".into() }, + NameIdPair { id: "a-finance".into(), name: "Finance".into() }, + NameIdPair { id: "a-writing".into(), name: "Writing".into() }, + NameIdPair { id: "a-coding".into(), name: "Coding".into() }, + NameIdPair { id: "a-health".into(), name: "Health".into() }, + NameIdPair { id: "a-home".into(), name: "Home".into() }, + NameIdPair { id: "a-learning".into(), name: "Learning".into() }, + NameIdPair { id: "a-marketing".into(), name: "Marketing".into() }, + ] + } + + // ── Expected output specification ──────────────────────────────────── + + struct Expected { + /// Substring that must appear in the title (case-insensitive) + title_contains: &'static str, + /// Expected status value + status: &'static str, + /// Expected project ID (None = must be empty) + project: Option<&'static str>, + /// Expected area ID (None = must be empty) + area: Option<&'static str>, + /// Expected scheduled date (None = must be empty) + scheduled: Option<&'static str>, + /// Expected due date (None = must be empty) + due: Option<&'static str>, + /// Expected defer date (None = must be empty) + defer: Option<&'static str>, + /// If true, body must be empty + body_empty: bool, + } + + // ── Test runner ────────────────────────────────────────────────────── + + fn run_eval(input: &str, expected: &Expected) -> (ParsedQuickEntry, Vec) { + let projects = eval_projects(); + let areas = eval_areas(); + + let projects_with_areas: Vec = projects + .iter() + .map(|p| super::super::ai_prompts::ProjectWithArea { + name: p.name.clone(), + area_name: p.area_name.clone(), + }) + .collect(); + + let system_prompt = super::super::ai_prompts::build_system_prompt( + &projects_with_areas, + &areas, + EVAL_DATE, + EVAL_DAY, + ); + + let response = crate::apple_intelligence::process_text(&system_prompt, input, 0) + .expect("Apple Intelligence call failed"); + + let result = parse_ai_response(&response, input, &projects, &areas) + .expect("Response parsing failed"); + + let mut failures = Vec::new(); + + // Check title + if !result.title.to_lowercase().contains(&expected.title_contains.to_lowercase()) { + failures.push(format!( + "title: expected to contain {:?}, got {:?}", + expected.title_contains, result.title + )); + } + + // Check status + if result.status != expected.status { + failures.push(format!( + "status: expected {:?}, got {:?}", + expected.status, result.status + )); + } + + // Check project + match expected.project { + Some(id) => { + if result.project_id.as_deref() != Some(id) { + failures.push(format!( + "project: expected Some({:?}), got {:?}", + id, result.project_id + )); + } + } + None => { + if result.project_id.is_some() { + failures.push(format!( + "project: expected None, got {:?}", + result.project_id + )); + } + } + } + + // Check area + match expected.area { + Some(id) => { + if result.area_id.as_deref() != Some(id) { + failures.push(format!( + "area: expected Some({:?}), got {:?}", + id, result.area_id + )); + } + } + None => { + if result.area_id.is_some() { + failures.push(format!( + "area: expected None, got {:?}", + result.area_id + )); + } + } + } + + // Check dates + check_date_field("scheduled", &result.scheduled, expected.scheduled, &mut failures); + check_date_field("due", &result.due, expected.due, &mut failures); + check_date_field("defer", &result.defer_until, expected.defer, &mut failures); + + // Check body + if expected.body_empty && !result.body.is_empty() { + failures.push(format!("body: expected empty, got {:?}", result.body)); + } + + (result, failures) + } + + fn check_date_field( + name: &str, + actual: &Option, + expected: Option<&str>, + failures: &mut Vec, + ) { + match expected { + Some(date) => { + if actual.as_deref() != Some(date) { + failures.push(format!( + "{name}: expected Some({date:?}), got {actual:?}" + )); + } + } + None => { + if actual.is_some() { + failures.push(format!( + "{name}: expected None, got {actual:?}" + )); + } + } + } + } + + // ── The eval suite ─────────────────────────────────────────────────── + + #[test] + #[ignore] + fn eval_ai() { + let cases: Vec<(&str, Expected)> = vec![ + // ── Simple inputs (should leave most fields empty) ─────── + ( + "Buy groceries for the week", + Expected { + title_contains: "groceries", + status: "inbox", + project: None, + area: None, + scheduled: None, + due: None, + defer: None, + body_empty: true, + }, + ), + ( + "Look into upgrading the database", + Expected { + title_contains: "database", + status: "inbox", + project: None, + area: None, + scheduled: None, + due: None, + defer: None, + body_empty: true, + }, + ), + ( + "Remember to water the plants", + Expected { + title_contains: "water", + status: "inbox", + project: None, + area: None, + scheduled: None, + due: None, + defer: None, + body_empty: true, + }, + ), + // ── Project/area matching ──────────────────────────────── + ( + "Review the Acme Dashboard Redesign mockups", + Expected { + title_contains: "mockup", + status: "inbox", + project: Some("p-acme"), + area: None, // area should come from project, not be set separately + scheduled: None, + due: None, + defer: None, + body_empty: false, + }, + ), + ( + "Write a blog post for the Tech Blog Relaunch", + Expected { + title_contains: "blog", + status: "inbox", + project: Some("p-blog"), + area: None, + scheduled: None, + due: None, + defer: None, + body_empty: false, + }, + ), + // ── Date extraction ────────────────────────────────────── + ( + "Call the dentist tomorrow about that crown", + Expected { + title_contains: "dentist", + status: "ready", + project: None, + area: None, + scheduled: Some("2026-03-26"), + due: None, + defer: None, + body_empty: false, + }, + ), + ( + "Submit the Q1 tax return by April 15th", + Expected { + title_contains: "tax", + status: "inbox", + project: Some("p-tax"), + area: None, + scheduled: None, + due: Some("2026-04-15"), + defer: None, + body_empty: false, + }, + ), + ( + "Schedule a team meeting for this Friday", + Expected { + title_contains: "meeting", + status: "inbox", + project: None, + area: None, + scheduled: Some("2026-03-27"), + due: None, + defer: None, + body_empty: false, + }, + ), + // ── Status detection ───────────────────────────────────── + ( + "Buy milk this afternoon", + Expected { + title_contains: "milk", + status: "ready", + project: None, + area: None, + scheduled: Some("2026-03-25"), // today + due: None, + defer: None, + body_empty: false, + }, + ), + ( + "Maybe one day learn to play guitar", + Expected { + title_contains: "guitar", + status: "icebox", + project: None, + area: None, + scheduled: None, + due: None, + defer: None, + body_empty: true, + }, + ), + ( + "The API refactor is blocked waiting on the security review", + Expected { + title_contains: "API", + status: "blocked", + project: None, + area: None, + scheduled: None, + due: None, + defer: None, + body_empty: false, + }, + ), + // ── Complex / dictation-style ──────────────────────────── + ( + "Email James about the Japan Trip, schedule for next Monday", + Expected { + title_contains: "James", + status: "inbox", + project: Some("p-japan"), + area: None, + scheduled: Some("2026-03-30"), + due: None, + defer: None, + body_empty: false, + }, + ), + ( + "Book flights by the end of next week", + Expected { + title_contains: "flight", + status: "inbox", + project: None, + area: None, + scheduled: None, + due: Some("2026-04-03"), + defer: None, + body_empty: false, + }, + ), + ]; + + println!("\n======================================================================"); + println!("AI Quick Entry Evaluation — {} cases", cases.len()); + println!("Context date: {EVAL_DATE} ({EVAL_DAY})"); + println!("======================================================================\n"); + + let mut total_pass = 0; + let mut total_fail = 0; + + for (input, expected) in &cases { + let (result, failures) = run_eval(input, expected); + + if failures.is_empty() { + total_pass += 1; + println!(" ✓ {input:?}"); + } else { + total_fail += 1; + println!(" ✗ {input:?}"); + println!(" Raw: title={:?} status={:?} project={:?} area={:?}", + result.title, result.status, result.project_id, result.area_id); + println!(" due={:?} sched={:?} defer={:?}", + result.due, result.scheduled, result.defer_until); + for f in &failures { + println!(" FAIL: {f}"); + } + } + } + + println!("\n----------------------------------------------------------------------"); + println!("Results: {total_pass} passed, {total_fail} failed out of {} cases", + cases.len()); + println!("----------------------------------------------------------------------\n"); + + // Don't assert — this is an eval tool, not a hard test. + // Some failures are expected while iterating on prompts. + if total_fail > 0 { + println!("NOTE: {total_fail} cases failed. This is expected while iterating."); + println!(" Review failures above and adjust ai_prompts.rs as needed."); + } + } +} From 27714ef85f46fef6ec8c8bff450387294f96891c Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 00:25:49 +0000 Subject: [PATCH 13/32] Expand AI eval harness to 32 test cases Adds redundancy for non-deterministic reliability testing: multiple phrasings for "tomorrow", day-of-week scheduling, "today" immediacy, and deadline language. Also adds hallucination traps where common words (health, home, learning) should NOT match area names, and partial project name matching tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- tdn-desktop/src-tauri/src/commands/ai.rs | 373 +++++++++++++++++------ 1 file changed, 275 insertions(+), 98 deletions(-) diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs index 4d3f13ac..c46928ea 100644 --- a/tdn-desktop/src-tauri/src/commands/ai.rs +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -488,181 +488,358 @@ mod eval { #[test] #[ignore] fn eval_ai() { + // Note: EVAL_DATE is 2026-03-25, a Wednesday. + // Thu=26, Fri=27, Sat=28, Sun=29, Mon=30, Tue=31, Wed Apr 1, Thu=2, Fri=3 let cases: Vec<(&str, Expected)> = vec![ - // ── Simple inputs (should leave most fields empty) ─────── + + // ============================================================= + // SIMPLE INPUTS — no metadata expected + // ============================================================= + ( "Buy groceries for the week", Expected { - title_contains: "groceries", - status: "inbox", - project: None, - area: None, - scheduled: None, - due: None, - defer: None, + title_contains: "groceries", status: "inbox", + project: None, area: None, + scheduled: None, due: None, defer: None, body_empty: true, }, ), ( "Look into upgrading the database", Expected { - title_contains: "database", - status: "inbox", - project: None, - area: None, - scheduled: None, - due: None, - defer: None, + title_contains: "database", status: "inbox", + project: None, area: None, + scheduled: None, due: None, defer: None, body_empty: true, }, ), ( "Remember to water the plants", Expected { - title_contains: "water", - status: "inbox", - project: None, - area: None, - scheduled: None, - due: None, - defer: None, + title_contains: "water", status: "inbox", + project: None, area: None, + scheduled: None, due: None, defer: None, body_empty: true, }, ), - // ── Project/area matching ──────────────────────────────── + ( + "Think about what to get mum for her birthday", + Expected { + title_contains: "mum", status: "inbox", + project: None, area: None, + scheduled: None, due: None, defer: None, + body_empty: true, + }, + ), + + // ============================================================= + // PROJECT MATCHING — explicit project names in input + // ============================================================= + ( "Review the Acme Dashboard Redesign mockups", Expected { - title_contains: "mockup", - status: "inbox", - project: Some("p-acme"), - area: None, // area should come from project, not be set separately - scheduled: None, - due: None, - defer: None, + title_contains: "mockup", status: "inbox", + project: Some("p-acme"), area: None, + scheduled: None, due: None, defer: None, body_empty: false, }, ), ( "Write a blog post for the Tech Blog Relaunch", Expected { - title_contains: "blog", - status: "inbox", - project: Some("p-blog"), - area: None, - scheduled: None, - due: None, - defer: None, + title_contains: "blog", status: "inbox", + project: Some("p-blog"), area: None, + scheduled: None, due: None, defer: None, + body_empty: false, + }, + ), + ( + "Check the Open Source CLI Tool issue tracker", + Expected { + title_contains: "CLI", status: "inbox", + project: Some("p-cli"), area: None, + scheduled: None, due: None, defer: None, + body_empty: false, + }, + ), + // Partial name — "Japan Trip" should match "Japan Trip 2025" + // (currently fails with exact matching — tests fuzzy matching improvement) + ( + "Email James about the Japan Trip", + Expected { + title_contains: "James", status: "inbox", + project: Some("p-japan"), area: None, + scheduled: None, due: None, defer: None, body_empty: false, }, ), - // ── Date extraction ────────────────────────────────────── + + // ============================================================= + // AREA MATCHING — explicit area names in input + // ============================================================= + + ( + "Send the January invoice to Acme Corp", + Expected { + title_contains: "invoice", status: "inbox", + project: None, area: Some("a-acme"), + scheduled: None, due: None, defer: None, + body_empty: false, + }, + ), + + // ============================================================= + // SCHEDULED DATES — "tomorrow" variations + // ============================================================= + ( "Call the dentist tomorrow about that crown", Expected { - title_contains: "dentist", - status: "ready", - project: None, - area: None, - scheduled: Some("2026-03-26"), - due: None, - defer: None, + title_contains: "dentist", status: "ready", + project: None, area: None, + scheduled: Some("2026-03-26"), due: None, defer: None, body_empty: false, }, ), ( - "Submit the Q1 tax return by April 15th", + "Pick up the dry cleaning tomorrow", Expected { - title_contains: "tax", - status: "inbox", - project: Some("p-tax"), - area: None, - scheduled: None, - due: Some("2026-04-15"), - defer: None, + title_contains: "dry cleaning", status: "ready", + project: None, area: None, + scheduled: Some("2026-03-26"), due: None, defer: None, body_empty: false, }, ), + ( + "Send that email to Sarah tomorrow morning", + Expected { + title_contains: "Sarah", status: "ready", + project: None, area: None, + scheduled: Some("2026-03-26"), due: None, defer: None, + body_empty: false, + }, + ), + + // ============================================================= + // SCHEDULED DATES — "this Friday" / "next Monday" / specific days + // ============================================================= + ( "Schedule a team meeting for this Friday", Expected { - title_contains: "meeting", - status: "inbox", - project: None, - area: None, - scheduled: Some("2026-03-27"), - due: None, - defer: None, + title_contains: "meeting", status: "inbox", + project: None, area: None, + scheduled: Some("2026-03-27"), due: None, defer: None, body_empty: false, }, ), - // ── Status detection ───────────────────────────────────── + ( + "Lunch with Tom on Thursday", + Expected { + title_contains: "Tom", status: "inbox", + project: None, area: None, + scheduled: Some("2026-03-26"), due: None, defer: None, + body_empty: false, + }, + ), + ( + "Schedule the Half Marathon Training run for next Monday", + Expected { + title_contains: "run", status: "inbox", + project: Some("p-marathon"), area: None, + scheduled: Some("2026-03-30"), due: None, defer: None, + body_empty: false, + }, + ), + + // ============================================================= + // SCHEDULED DATES — "today" / "this afternoon" + // ============================================================= + ( "Buy milk this afternoon", Expected { - title_contains: "milk", - status: "ready", - project: None, - area: None, - scheduled: Some("2026-03-25"), // today - due: None, - defer: None, + title_contains: "milk", status: "ready", + project: None, area: None, + scheduled: Some("2026-03-25"), due: None, defer: None, + body_empty: false, + }, + ), + ( + "Call the bank today about that charge", + Expected { + title_contains: "bank", status: "ready", + project: None, area: None, + scheduled: Some("2026-03-25"), due: None, defer: None, + body_empty: false, + }, + ), + + // ============================================================= + // DUE DATES — deadline language + // ============================================================= + + ( + "Submit the Q1 tax return by April 15th", + Expected { + title_contains: "tax", status: "inbox", + project: Some("p-tax"), area: None, + scheduled: None, due: Some("2026-04-15"), defer: None, + body_empty: false, + }, + ), + ( + "The report is due by Friday", + Expected { + title_contains: "report", status: "inbox", + project: None, area: None, + scheduled: None, due: Some("2026-03-27"), defer: None, + body_empty: false, + }, + ), + ( + "Book flights by the end of next week", + Expected { + title_contains: "flight", status: "inbox", + project: None, area: None, + scheduled: None, due: Some("2026-04-03"), defer: None, + body_empty: false, + }, + ), + ( + "Renew passport, deadline is June 1st", + Expected { + title_contains: "passport", status: "inbox", + project: None, area: None, + scheduled: None, due: Some("2026-06-01"), defer: None, body_empty: false, }, ), + + // ============================================================= + // STATUS — inbox (default, no signal) + // ============================================================= + // (covered by the simple inputs above) + + // ============================================================= + // STATUS — ready (immediate action) + // ============================================================= + // (covered by "this afternoon" and "today" cases above) + + // ============================================================= + // STATUS — icebox (someday/maybe) + // ============================================================= + ( "Maybe one day learn to play guitar", Expected { - title_contains: "guitar", - status: "icebox", - project: None, - area: None, - scheduled: None, - due: None, - defer: None, + title_contains: "guitar", status: "icebox", + project: None, area: None, + scheduled: None, due: None, defer: None, body_empty: true, }, ), + ( + "I might eventually look into getting a motorbike licence", + Expected { + title_contains: "motorbike", status: "icebox", + project: None, area: None, + scheduled: None, due: None, defer: None, + body_empty: false, + }, + ), + + // ============================================================= + // STATUS — blocked + // ============================================================= + ( "The API refactor is blocked waiting on the security review", Expected { - title_contains: "API", - status: "blocked", - project: None, - area: None, - scheduled: None, - due: None, - defer: None, + title_contains: "API", status: "blocked", + project: None, area: None, + scheduled: None, due: None, defer: None, body_empty: false, }, ), - // ── Complex / dictation-style ──────────────────────────── + ( + "Can't finish the Garden Renovation until the quote comes back", + Expected { + title_contains: "Garden", status: "blocked", + project: Some("p-garden"), area: None, + scheduled: None, due: None, defer: None, + body_empty: false, + }, + ), + + // ============================================================= + // COMPLEX / DICTATION — multiple fields + // ============================================================= + ( "Email James about the Japan Trip, schedule for next Monday", Expected { - title_contains: "James", - status: "inbox", - project: Some("p-japan"), - area: None, - scheduled: Some("2026-03-30"), - due: None, - defer: None, + title_contains: "James", status: "inbox", + project: Some("p-japan"), area: None, + scheduled: Some("2026-03-30"), due: None, defer: None, body_empty: false, }, ), ( - "Book flights by the end of next week", + "I need to send that invoice to Acme Corp by the end of the month", Expected { - title_contains: "flight", - status: "inbox", - project: None, - area: None, - scheduled: None, - due: Some("2026-04-03"), - defer: None, + title_contains: "invoice", status: "inbox", + project: None, area: Some("a-acme"), + scheduled: None, due: Some("2026-03-31"), defer: None, body_empty: false, }, ), + ( + "Review the Newsletter Setup project tomorrow, we need to get it done before April", + Expected { + title_contains: "Newsletter", status: "inbox", + project: Some("p-newsletter"), area: None, + scheduled: Some("2026-03-26"), due: Some("2026-03-31"), defer: None, + body_empty: false, + }, + ), + + // ============================================================= + // NO HALLUCINATION — inputs that might trick the model + // ============================================================= + + // Mentions "health" but not as an area reference + ( + "Check if my health insurance covers this procedure", + Expected { + title_contains: "insurance", status: "inbox", + project: None, area: None, + scheduled: None, due: None, defer: None, + body_empty: false, + }, + ), + // Mentions "home" but not as an area reference + ( + "Pick up something on the way home", + Expected { + title_contains: "home", status: "inbox", + project: None, area: None, + scheduled: None, due: None, defer: None, + body_empty: true, + }, + ), + // Mentions "learning" but not as an area reference + ( + "I'm learning a lot from this course", + Expected { + title_contains: "course", status: "inbox", + project: None, area: None, + scheduled: None, due: None, defer: None, + body_empty: true, + }, + ), ]; println!("\n======================================================================"); From d37ba59e29ecfa33369cfa04d26321502839c2d4 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 00:38:06 +0000 Subject: [PATCH 14/32] Fix eval harness body expectations and make body check optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit body_empty is now Option — None means don't check. Most cases use None since body content is determined by deterministic Rust code (original text preserved when title is transformed), not by the LLM. Baseline: 11/31 passing. Co-Authored-By: Claude Opus 4.6 (1M context) --- tdn-desktop/src-tauri/src/commands/ai.rs | 98 ++++++++++++------------ 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs index c46928ea..f0e3bc4e 100644 --- a/tdn-desktop/src-tauri/src/commands/ai.rs +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -357,8 +357,10 @@ mod eval { due: Option<&'static str>, /// Expected defer date (None = must be empty) defer: Option<&'static str>, - /// If true, body must be empty - body_empty: bool, + /// Body check: Some(true) = must be empty, Some(false) = must have content, None = don't check. + /// Note: body is populated by deterministic Rust code (original text preserved when title + /// is transformed), so this mostly tests our code, not the LLM. Use None for most cases. + body_empty: Option, } // ── Test runner ────────────────────────────────────────────────────── @@ -452,8 +454,14 @@ mod eval { check_date_field("defer", &result.defer_until, expected.defer, &mut failures); // Check body - if expected.body_empty && !result.body.is_empty() { - failures.push(format!("body: expected empty, got {:?}", result.body)); + match expected.body_empty { + Some(true) if !result.body.is_empty() => { + failures.push(format!("body: expected empty, got {:?}", result.body)); + } + Some(false) if result.body.is_empty() => { + failures.push("body: expected content, got empty".to_string()); + } + _ => {} // None = don't check } (result, failures) @@ -490,6 +498,11 @@ mod eval { fn eval_ai() { // Note: EVAL_DATE is 2026-03-25, a Wednesday. // Thu=26, Fri=27, Sat=28, Sun=29, Mon=30, Tue=31, Wed Apr 1, Thu=2, Fri=3 + // + // body_empty: None means "don't check body" — body content is determined by + // deterministic Rust code (if title differs from input, original text goes in + // body), so it's not testing LLM quality. Use Some(true) only when the title + // is very likely to be identical to input (i.e. input is already a clean title). let cases: Vec<(&str, Expected)> = vec![ // ============================================================= @@ -502,7 +515,7 @@ mod eval { title_contains: "groceries", status: "inbox", project: None, area: None, scheduled: None, due: None, defer: None, - body_empty: true, + body_empty: None, // title may or may not be shortened }, ), ( @@ -511,7 +524,7 @@ mod eval { title_contains: "database", status: "inbox", project: None, area: None, scheduled: None, due: None, defer: None, - body_empty: true, + body_empty: None, }, ), ( @@ -520,7 +533,7 @@ mod eval { title_contains: "water", status: "inbox", project: None, area: None, scheduled: None, due: None, defer: None, - body_empty: true, + body_empty: None, }, ), ( @@ -529,7 +542,7 @@ mod eval { title_contains: "mum", status: "inbox", project: None, area: None, scheduled: None, due: None, defer: None, - body_empty: true, + body_empty: None, }, ), @@ -543,7 +556,7 @@ mod eval { title_contains: "mockup", status: "inbox", project: Some("p-acme"), area: None, scheduled: None, due: None, defer: None, - body_empty: false, + body_empty: None, }, ), ( @@ -552,7 +565,7 @@ mod eval { title_contains: "blog", status: "inbox", project: Some("p-blog"), area: None, scheduled: None, due: None, defer: None, - body_empty: false, + body_empty: None, }, ), ( @@ -561,7 +574,7 @@ mod eval { title_contains: "CLI", status: "inbox", project: Some("p-cli"), area: None, scheduled: None, due: None, defer: None, - body_empty: false, + body_empty: None, }, ), // Partial name — "Japan Trip" should match "Japan Trip 2025" @@ -572,7 +585,7 @@ mod eval { title_contains: "James", status: "inbox", project: Some("p-japan"), area: None, scheduled: None, due: None, defer: None, - body_empty: false, + body_empty: None, }, ), @@ -586,7 +599,7 @@ mod eval { title_contains: "invoice", status: "inbox", project: None, area: Some("a-acme"), scheduled: None, due: None, defer: None, - body_empty: false, + body_empty: None, }, ), @@ -600,7 +613,7 @@ mod eval { title_contains: "dentist", status: "ready", project: None, area: None, scheduled: Some("2026-03-26"), due: None, defer: None, - body_empty: false, + body_empty: None, }, ), ( @@ -609,7 +622,7 @@ mod eval { title_contains: "dry cleaning", status: "ready", project: None, area: None, scheduled: Some("2026-03-26"), due: None, defer: None, - body_empty: false, + body_empty: None, }, ), ( @@ -618,7 +631,7 @@ mod eval { title_contains: "Sarah", status: "ready", project: None, area: None, scheduled: Some("2026-03-26"), due: None, defer: None, - body_empty: false, + body_empty: None, }, ), @@ -632,7 +645,7 @@ mod eval { title_contains: "meeting", status: "inbox", project: None, area: None, scheduled: Some("2026-03-27"), due: None, defer: None, - body_empty: false, + body_empty: None, }, ), ( @@ -641,7 +654,7 @@ mod eval { title_contains: "Tom", status: "inbox", project: None, area: None, scheduled: Some("2026-03-26"), due: None, defer: None, - body_empty: false, + body_empty: None, }, ), ( @@ -650,7 +663,7 @@ mod eval { title_contains: "run", status: "inbox", project: Some("p-marathon"), area: None, scheduled: Some("2026-03-30"), due: None, defer: None, - body_empty: false, + body_empty: None, }, ), @@ -664,7 +677,7 @@ mod eval { title_contains: "milk", status: "ready", project: None, area: None, scheduled: Some("2026-03-25"), due: None, defer: None, - body_empty: false, + body_empty: None, }, ), ( @@ -673,7 +686,7 @@ mod eval { title_contains: "bank", status: "ready", project: None, area: None, scheduled: Some("2026-03-25"), due: None, defer: None, - body_empty: false, + body_empty: None, }, ), @@ -687,7 +700,7 @@ mod eval { title_contains: "tax", status: "inbox", project: Some("p-tax"), area: None, scheduled: None, due: Some("2026-04-15"), defer: None, - body_empty: false, + body_empty: None, }, ), ( @@ -696,7 +709,7 @@ mod eval { title_contains: "report", status: "inbox", project: None, area: None, scheduled: None, due: Some("2026-03-27"), defer: None, - body_empty: false, + body_empty: None, }, ), ( @@ -705,7 +718,7 @@ mod eval { title_contains: "flight", status: "inbox", project: None, area: None, scheduled: None, due: Some("2026-04-03"), defer: None, - body_empty: false, + body_empty: None, }, ), ( @@ -714,20 +727,10 @@ mod eval { title_contains: "passport", status: "inbox", project: None, area: None, scheduled: None, due: Some("2026-06-01"), defer: None, - body_empty: false, + body_empty: None, }, ), - // ============================================================= - // STATUS — inbox (default, no signal) - // ============================================================= - // (covered by the simple inputs above) - - // ============================================================= - // STATUS — ready (immediate action) - // ============================================================= - // (covered by "this afternoon" and "today" cases above) - // ============================================================= // STATUS — icebox (someday/maybe) // ============================================================= @@ -738,7 +741,7 @@ mod eval { title_contains: "guitar", status: "icebox", project: None, area: None, scheduled: None, due: None, defer: None, - body_empty: true, + body_empty: None, }, ), ( @@ -747,7 +750,7 @@ mod eval { title_contains: "motorbike", status: "icebox", project: None, area: None, scheduled: None, due: None, defer: None, - body_empty: false, + body_empty: None, }, ), @@ -761,7 +764,7 @@ mod eval { title_contains: "API", status: "blocked", project: None, area: None, scheduled: None, due: None, defer: None, - body_empty: false, + body_empty: None, }, ), ( @@ -770,7 +773,7 @@ mod eval { title_contains: "Garden", status: "blocked", project: Some("p-garden"), area: None, scheduled: None, due: None, defer: None, - body_empty: false, + body_empty: None, }, ), @@ -784,7 +787,7 @@ mod eval { title_contains: "James", status: "inbox", project: Some("p-japan"), area: None, scheduled: Some("2026-03-30"), due: None, defer: None, - body_empty: false, + body_empty: None, }, ), ( @@ -793,7 +796,7 @@ mod eval { title_contains: "invoice", status: "inbox", project: None, area: Some("a-acme"), scheduled: None, due: Some("2026-03-31"), defer: None, - body_empty: false, + body_empty: None, }, ), ( @@ -802,42 +805,39 @@ mod eval { title_contains: "Newsletter", status: "inbox", project: Some("p-newsletter"), area: None, scheduled: Some("2026-03-26"), due: Some("2026-03-31"), defer: None, - body_empty: false, + body_empty: None, }, ), // ============================================================= - // NO HALLUCINATION — inputs that might trick the model + // NO HALLUCINATION — words that look like area names but aren't // ============================================================= - // Mentions "health" but not as an area reference ( "Check if my health insurance covers this procedure", Expected { title_contains: "insurance", status: "inbox", project: None, area: None, scheduled: None, due: None, defer: None, - body_empty: false, + body_empty: None, }, ), - // Mentions "home" but not as an area reference ( "Pick up something on the way home", Expected { title_contains: "home", status: "inbox", project: None, area: None, scheduled: None, due: None, defer: None, - body_empty: true, + body_empty: None, }, ), - // Mentions "learning" but not as an area reference ( "I'm learning a lot from this course", Expected { title_contains: "course", status: "inbox", project: None, area: None, scheduled: None, due: None, defer: None, - body_empty: true, + body_empty: None, }, ), ]; From 90b85328d61445468ae856574116c29635c73f42 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 01:06:02 +0000 Subject: [PATCH 15/32] Update task doc: add deterministic status phase and auto-ready rules Phase 6 now covers removing status from the LLM entirely, replacing it with keyword-based detection and two auto-ready rules (one for all quick entry, one for AI-processed entries with near-term dates). Renumbers subsequent phases. Marks Phase 5 (eval harness) as complete. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../task-x-quick-entry-ai-processing.md | 93 ++++++++++++------- 1 file changed, 62 insertions(+), 31 deletions(-) diff --git a/docs/tasks-todo/task-x-quick-entry-ai-processing.md b/docs/tasks-todo/task-x-quick-entry-ai-processing.md index c84f32c0..33828e83 100644 --- a/docs/tasks-todo/task-x-quick-entry-ai-processing.md +++ b/docs/tasks-todo/task-x-quick-entry-ai-processing.md @@ -97,44 +97,75 @@ These findings are from hands-on testing and WWDC25 research. They should inform ## Next Steps -### Phase 5: Evaluation Harness - -Build a development tool for rapid prompt iteration. This is NOT part of the normal test suite — it requires a live Apple Intelligence model on the device. - -**Approach:** Rust `#[ignore]` integration test that: -- Uses the real Swift FFI bridge (same code path as production) -- Has a fixed set of ~15 test cases with input text + expected field values -- Uses fixed context (hardcoded projects, areas, date) for reproducibility -- Calls `build_system_prompt` and `process_text` directly (no Tauri/frontend) -- Outputs a per-field pass/fail summary table -- Runnable via `cargo test eval_ai -- --ignored` (or a `bun run` alias) - -**Test case structure:** -```rust -EvalCase { - input: "Email James about the Japan Trip, schedule for next Monday", - expected_title_contains: "Email James", // substring match, not exact - expected_status: "inbox", - expected_project: Some("Japan Trip 2025"), - expected_area: None, - expected_scheduled: Some("2026-03-30"), // next Monday - expected_due: None, - expected_defer: None, -} +### Phase 5: Evaluation Harness ✅ + +Done. 31 test cases in `commands/ai.rs` covering simple inputs, project/area matching, date extraction, status detection, complex dictation, and hallucination traps. Run with: + ``` +cd tdn-desktop/src-tauri && cargo test eval_ai --lib -- --ignored --nocapture +``` + +Current baseline: **11/31 passing**. Most failures are date arithmetic and project matching — addressed in Phases 7 and 8. + +### Phase 6: Deterministic Status and Auto-Ready Rules + +Remove status from Apple Intelligence entirely. Status is better handled by deterministic rules. + +**Background:** The LLM is inconsistent with status (sometimes "ready" for "tomorrow", sometimes not). The cases where it adds value (icebox, blocked) are rare and can be detected via keyword matching. Meanwhile, the most common and impactful status decision — inbox vs ready — follows clear rules based on what other fields are populated. + +**The status model:** + +1. **Default:** All tasks start as `inbox`. +2. **Keyword detection (Rust, post-AI):** Scan the original input text for explicit status language. Only match unambiguous phrases: + - `icebox` / `ice box` → `icebox` + - `blocked` / `waiting on` / `can't proceed` / `stuck on` → `blocked` + - `in progress` / `already started` / `working on` → `in-progress` + - Narrow keywords only. "Maybe" alone is NOT icebox. "Might" is NOT icebox. Only "icebox"/"ice box" and similar explicit phrases. +3. **Auto-ready Rule 1 (all quick entry, not just AI):** If status is `inbox` AND `(projectId OR areaId) is set` AND `(scheduled OR deferUntil) is set` → change to `ready`. Reasoning: a task with both a project/area and a when-to-do-it date has been "processed" — it doesn't need the inbox. +4. **Auto-ready Rule 2 (AI-processed entries only):** If status is `inbox` (keyword detection didn't set something else) AND `scheduled` date is within 7 days of today → change to `ready`. Catches "call Dave this afternoon" and "pick up laundry tomorrow" style tasks that are clearly actionable now. + +**Implementation plan:** + +**Step 1: Remove status from the LLM** +- Remove `ParsedStatus` enum and `status` field from `ParsedTask` in `apple_intelligence.swift` +- Remove `parsedTaskToJSON` status handling +- Remove status from the prompt in `ai_prompts.rs` (both field instructions and few-shot examples) +- Remove status from the `ParsedQuickEntry` response (or always return "inbox") +- This simplifies the `@Generable` struct from 8 fields to 7, giving the model more capacity for the remaining fields + +**Step 2: Keyword-based status detection in Rust** +- New function in `ai.rs`: `detect_status_from_keywords(input: &str) -> TaskStatus` +- Scans the original input text (not the AI response) for explicit status phrases +- Returns `inbox` if no keywords found +- Called during `process_quick_entry_text`, result included in `ParsedQuickEntry` +- **Write unit tests** for this function — it's deterministic and easily testable without the LLM + +**Step 3: Auto-ready Rule 1 in QuickPaneApp.tsx** +- Runs in `handleSubmit` for ALL quick entry saves (not just AI) +- Before creating the task: if status is `inbox` and the auto-ready conditions are met, change to `ready` +- This improves UX for manual quick entry too + +**Step 4: Auto-ready Rule 2 in QuickPaneApp.tsx** +- Runs only after AI processing populates fields (in `handleProcessWithAI`) +- After all fields are populated: if status is still `inbox` and scheduled is within 7 days → change to `ready` +- Only triggers when keyword detection didn't already set a different status -Field matching should be flexible: substring for titles, exact for status/dates, optional for project/area (Some = must match, None = must be empty). This lets us measure regression when changing prompts. +**Step 5: Update eval harness** +- Remove status expectations from cases where it was being tested as an LLM output +- Add new unit tests for keyword detection (deterministic, runs in normal test suite) +- Add tests for auto-ready rules +- Re-run eval harness to measure improvement from removing status from the LLM -### Phase 6: Fix Few-Shot Contamination +### Phase 7: Fix Few-Shot Contamination -Remove or redesign the third few-shot example (Q1 tax return) to avoid body contamination. Options: -- Make the example input much more distinct from likely real inputs +Remove or redesign the third few-shot example (Q1 tax return) to avoid body contamination (model copies "Gather all receipts first" from the example into real responses). Options: +- Make example inputs much more distinct from likely real inputs - Use a fictional project/area name that doesn't appear in real data - Remove body content from all examples (always show `"body":""`) -This is a quick prompt-only change in `ai_prompts.rs`, testable via the eval harness. +Quick prompt-only change in `ai_prompts.rs`, testable via the eval harness. -### Phase 7: Deterministic Date and Project/Area Resolution +### Phase 8: Deterministic Date and Project/Area Resolution Split the work into what the LLM is good at (language understanding, intent classification) and what deterministic code is good at (date arithmetic, fuzzy string matching). @@ -168,7 +199,7 @@ Add fuzzy matching in Rust alongside the existing exact match. "Japan Trip" shou Start with case-insensitive substring (covers the "Japan Trip" case) and evaluate via the harness. -### Phase 8: Polish and Edge Cases +### Phase 9: Polish and Edge Cases - Re-processing support (user processes, edits title, processes again) - Cancellation during processing (Escape while LLM is running) From 3c79955f7f42bfbea6544c6c47e6c3d7ab6e10e3 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 01:15:10 +0000 Subject: [PATCH 16/32] Split auto-ready into cherry-pickable Phase 6 and AI-specific Phase 7 Phase 6 is now a standalone UX improvement (auto-ready when project/area + scheduled/defer set) that applies to all quick entry and can be cherry-picked independently. Phase 7 covers removing status from the LLM, keyword detection, and the AI-only near-term auto-ready rule. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../task-x-quick-entry-ai-processing.md | 70 +++++++++---------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/docs/tasks-todo/task-x-quick-entry-ai-processing.md b/docs/tasks-todo/task-x-quick-entry-ai-processing.md index 33828e83..56cf5241 100644 --- a/docs/tasks-todo/task-x-quick-entry-ai-processing.md +++ b/docs/tasks-todo/task-x-quick-entry-ai-processing.md @@ -107,54 +107,54 @@ cd tdn-desktop/src-tauri && cargo test eval_ai --lib -- --ignored --nocapture Current baseline: **11/31 passing**. Most failures are date arithmetic and project matching — addressed in Phases 7 and 8. -### Phase 6: Deterministic Status and Auto-Ready Rules +### Phase 6: Auto-Ready on Quick Entry (non-AI, cherry-pickable) -Remove status from Apple Intelligence entirely. Status is better handled by deterministic rules. +**This is a standalone UX improvement, not AI-specific.** Should be implemented as its own commit so it can be cherry-picked onto main independently of the AI feature branch. -**Background:** The LLM is inconsistent with status (sometimes "ready" for "tomorrow", sometimes not). The cases where it adds value (icebox, blocked) are rare and can be detected via keyword matching. Meanwhile, the most common and impactful status decision — inbox vs ready — follows clear rules based on what other fields are populated. +**Rule:** If a task's status is `inbox` and the user has set `(projectId OR areaId) AND (scheduled OR deferUntil)`, auto-promote to `ready`. A task with both a project/area and a when-to-do-it date has been "processed" — it doesn't need the inbox. -**The status model:** +**Implementation:** A `useEffect` in `QuickPaneApp.tsx` that: +- Watches `[projectId, areaId, scheduled, deferUntil]` (NOT `status` — avoids feedback loops) +- When conditions are met, calls `setStatus(prev => prev === 'inbox' ? 'ready' : prev)` +- Only promotes `inbox` → `ready`, never touches any other status +- If the user manually changes status back to `inbox` and then modifies another triggering field, the effect re-fires — this is correct behaviour (conditions are met again) -1. **Default:** All tasks start as `inbox`. -2. **Keyword detection (Rust, post-AI):** Scan the original input text for explicit status language. Only match unambiguous phrases: - - `icebox` / `ice box` → `icebox` - - `blocked` / `waiting on` / `can't proceed` / `stuck on` → `blocked` - - `in progress` / `already started` / `working on` → `in-progress` - - Narrow keywords only. "Maybe" alone is NOT icebox. "Might" is NOT icebox. Only "icebox"/"ice box" and similar explicit phrases. -3. **Auto-ready Rule 1 (all quick entry, not just AI):** If status is `inbox` AND `(projectId OR areaId) is set` AND `(scheduled OR deferUntil) is set` → change to `ready`. Reasoning: a task with both a project/area and a when-to-do-it date has been "processed" — it doesn't need the inbox. -4. **Auto-ready Rule 2 (AI-processed entries only):** If status is `inbox` (keyword detection didn't set something else) AND `scheduled` date is within 7 days of today → change to `ready`. Catches "call Dave this afternoon" and "pick up laundry tomorrow" style tasks that are clearly actionable now. +This is a few lines of React, zero performance concern (watches 4 state variables that change on dropdown/picker selection, not keystrokes), and gives immediate visual feedback via the status pill. -**Implementation plan:** +### Phase 7: Deterministic Status for AI Processing + +Remove status from Apple Intelligence. Status is better handled by deterministic rules. + +**Background:** The LLM is inconsistent with status (sometimes "ready" for "tomorrow", sometimes not). The cases where it adds value (icebox, blocked) are rare and can be detected via keyword matching. **Step 1: Remove status from the LLM** - Remove `ParsedStatus` enum and `status` field from `ParsedTask` in `apple_intelligence.swift` - Remove `parsedTaskToJSON` status handling - Remove status from the prompt in `ai_prompts.rs` (both field instructions and few-shot examples) -- Remove status from the `ParsedQuickEntry` response (or always return "inbox") -- This simplifies the `@Generable` struct from 8 fields to 7, giving the model more capacity for the remaining fields +- Always return `inbox` as status from `ParsedQuickEntry` +- This simplifies the `@Generable` struct from 8 fields to 7, giving the model more capacity **Step 2: Keyword-based status detection in Rust** -- New function in `ai.rs`: `detect_status_from_keywords(input: &str) -> TaskStatus` -- Scans the original input text (not the AI response) for explicit status phrases +- New function in `ai.rs`: `detect_status_from_keywords(input: &str) -> &str` +- Scans the original input text (not the AI response) for explicit, unambiguous status phrases: + - `icebox` / `ice box` → `icebox` + - `blocked` / `waiting on` / `can't proceed` / `stuck on` → `blocked` + - `in progress` / `already started` / `working on` → `in-progress` +- Narrow keywords only. "Maybe" alone is NOT icebox. "Might" is NOT icebox. - Returns `inbox` if no keywords found -- Called during `process_quick_entry_text`, result included in `ParsedQuickEntry` -- **Write unit tests** for this function — it's deterministic and easily testable without the LLM - -**Step 3: Auto-ready Rule 1 in QuickPaneApp.tsx** -- Runs in `handleSubmit` for ALL quick entry saves (not just AI) -- Before creating the task: if status is `inbox` and the auto-ready conditions are met, change to `ready` -- This improves UX for manual quick entry too - -**Step 4: Auto-ready Rule 2 in QuickPaneApp.tsx** -- Runs only after AI processing populates fields (in `handleProcessWithAI`) -- After all fields are populated: if status is still `inbox` and scheduled is within 7 days → change to `ready` -- Only triggers when keyword detection didn't already set a different status - -**Step 5: Update eval harness** -- Remove status expectations from cases where it was being tested as an LLM output -- Add new unit tests for keyword detection (deterministic, runs in normal test suite) -- Add tests for auto-ready rules -- Re-run eval harness to measure improvement from removing status from the LLM +- **Write unit tests** — deterministic, part of the normal test suite + +**Step 3: Auto-ready Rule 2 (AI only, near-term scheduled)** +- In `handleProcessWithAI` in `QuickPaneApp.tsx`, after all AI fields are populated +- If status is still `inbox` (keyword detection didn't override) AND `scheduled` is within 7 days of today → set to `ready` +- Catches "call Dave this afternoon" and "pick up laundry tomorrow" +- Note: Phase 6's Rule 1 (useEffect) will also fire if project/area + dates are set, so both rules complement each other + +**Step 4: Update eval harness and tests** +- Remove status expectations from eval cases where status was tested as LLM output +- Eval harness should test the *final* status after keyword detection + auto-ready rules, not the raw LLM output +- Add unit tests for `detect_status_from_keywords` +- Re-run eval harness to measure improvement ### Phase 7: Fix Few-Shot Contamination From 92716dd96f591c08d95ffcf2f1222703038bebc1 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 01:16:49 +0000 Subject: [PATCH 17/32] =?UTF-8?q?Auto-promote=20inbox=20=E2=86=92=20ready?= =?UTF-8?q?=20when=20task=20has=20project/area=20+=20scheduled/defer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a useEffect to the quick entry pane that watches projectId, areaId, scheduled, and deferUntil. When a task has (project or area) AND (scheduled or defer-until) set, and status is still inbox, it auto-promotes to ready — the task has enough context that it doesn't need manual inbox processing. This applies to all quick entry (manual and AI-assisted) and gives immediate visual feedback via the status pill. Only promotes inbox → ready, never touches other statuses. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/components/quick-pane/QuickPaneApp.tsx | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx index 0384c936..dabd1186 100644 --- a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx +++ b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx @@ -148,6 +148,21 @@ export default function QuickPaneApp() { setRestoreFocusTo(null) }, []) + // ───────────────────────────────────────────────────────────────────────── + // Auto-Ready: promote inbox → ready when task appears "processed" + // A task with (project or area) AND (scheduled or defer-until) has enough + // context that it doesn't need to sit in the inbox for manual processing. + // ───────────────────────────────────────────────────────────────────────── + + React.useEffect(() => { + const hasProjectOrArea = projectId !== null || areaId !== null + const hasScheduleOrDefer = scheduled !== null || deferUntil !== null + + if (hasProjectOrArea && hasScheduleOrDefer) { + setStatus(prev => (prev === 'inbox' ? 'ready' : prev)) + } + }, [projectId, areaId, scheduled, deferUntil]) + // ───────────────────────────────────────────────────────────────────────── // Dismiss with Animation // ───────────────────────────────────────────────────────────────────────── From 0fc3589854b96b49002ad444a02134698cc30394 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 01:25:34 +0000 Subject: [PATCH 18/32] Remove status from LLM, add keyword detection and near-term auto-ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes status entirely from the Apple Intelligence @Generable struct and prompt — the ~3B model was inconsistent with status and this simplifies its job (7 fields instead of 8). Status is now determined by: 1. Keyword detection in Rust (scans input for "blocked", "icebox", "in progress" etc.) — narrow, unambiguous matches only 2. Auto-ready Rule 2 in frontend (AI only): if scheduled date is within 7 days and status is inbox, promote to ready Also fixes few-shot contamination by replacing the Q1 tax return example (which was leaking "Gather all receipts first" into real responses) with a Newsletter Setup example. Adds 6 unit tests for keyword detection (deterministic, runs in normal test suite). Updates eval harness status expectations. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../task-x-quick-entry-ai-processing.md | 22 ++- tdn-desktop/src-tauri/src/commands/ai.rs | 131 +++++++++++++++--- .../src-tauri/src/commands/ai_prompts.rs | 18 +-- .../src-tauri/swift/apple_intelligence.swift | 26 ---- .../components/quick-pane/QuickPaneApp.tsx | 17 ++- 5 files changed, 140 insertions(+), 74 deletions(-) diff --git a/docs/tasks-todo/task-x-quick-entry-ai-processing.md b/docs/tasks-todo/task-x-quick-entry-ai-processing.md index 56cf5241..a94cfda4 100644 --- a/docs/tasks-todo/task-x-quick-entry-ai-processing.md +++ b/docs/tasks-todo/task-x-quick-entry-ai-processing.md @@ -107,19 +107,9 @@ cd tdn-desktop/src-tauri && cargo test eval_ai --lib -- --ignored --nocapture Current baseline: **11/31 passing**. Most failures are date arithmetic and project matching — addressed in Phases 7 and 8. -### Phase 6: Auto-Ready on Quick Entry (non-AI, cherry-pickable) +### Phase 6: Auto-Ready on Quick Entry (non-AI, cherry-pickable) ✅ -**This is a standalone UX improvement, not AI-specific.** Should be implemented as its own commit so it can be cherry-picked onto main independently of the AI feature branch. - -**Rule:** If a task's status is `inbox` and the user has set `(projectId OR areaId) AND (scheduled OR deferUntil)`, auto-promote to `ready`. A task with both a project/area and a when-to-do-it date has been "processed" — it doesn't need the inbox. - -**Implementation:** A `useEffect` in `QuickPaneApp.tsx` that: -- Watches `[projectId, areaId, scheduled, deferUntil]` (NOT `status` — avoids feedback loops) -- When conditions are met, calls `setStatus(prev => prev === 'inbox' ? 'ready' : prev)` -- Only promotes `inbox` → `ready`, never touches any other status -- If the user manually changes status back to `inbox` and then modifies another triggering field, the effect re-fires — this is correct behaviour (conditions are met again) - -This is a few lines of React, zero performance concern (watches 4 state variables that change on dropdown/picker selection, not keystrokes), and gives immediate visual feedback via the status pill. +Done. `useEffect` in `QuickPaneApp.tsx` watches `[projectId, areaId, scheduled, deferUntil]` and promotes `inbox` → `ready` when `(project OR area) AND (scheduled OR defer)` are set. Standalone commit, cherry-pickable. ### Phase 7: Deterministic Status for AI Processing @@ -204,3 +194,11 @@ Start with case-insensitive substring (covers the "Japan Trip" case) and evaluat - Re-processing support (user processes, edits title, processes again) - Cancellation during processing (Escape while LLM is running) - Very long input handling (context window limits?) + +### Phase 10: Docs + +- Update develper quick-entry pane docs as needed +- Update userguide page on Quick Entry pane to mention + A) Auto-setting of status to Ready when (project || area ) && (scheduled || defer-until) are set. + B) Basic explanation of how the sparkle button works and what it's for, and when it's available. +- Update apple-intelligence.md developer doc as needed so it's accurate about how things currently work. Include a brief mention of how to use the eval test to iterate on prompts etc. diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs index f0e3bc4e..e37ff51e 100644 --- a/tdn-desktop/src-tauri/src/commands/ai.rs +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -95,7 +95,10 @@ pub fn process_quick_entry_text( log::info!("Raw response: {response}"); - let result = parse_ai_response(&response, trimmed, &projects, &areas)?; + let mut result = parse_ai_response(&response, trimmed, &projects, &areas)?; + + // Determine status via keyword detection (not LLM) + result.status = detect_status_from_keywords(trimmed).to_string(); log::info!("Mapped result:"); log::info!(" title: {:?}", result.title); @@ -183,17 +186,8 @@ fn parse_ai_response( } }; - let status = parsed["status"] - .as_str() - .unwrap_or("inbox") - .trim() - .to_string(); - - // Validate status is a known value - let status = match status.as_str() { - "inbox" | "icebox" | "ready" | "in-progress" | "blocked" => status, - _ => "inbox".to_string(), - }; + // Status is determined by keyword detection, not the LLM + let status = "inbox".to_string(); let due = non_empty_date(parsed["due"].as_str()); let scheduled = non_empty_date(parsed["scheduled"].as_str()); @@ -289,6 +283,98 @@ fn match_name_to_id(name: &str, pairs: &[NameIdPair]) -> Option { .map(|p| p.id.clone()) } +// ============================================================================= +// Keyword-Based Status Detection +// ============================================================================= + +/// Detect task status from explicit keywords in the input text. +/// Only matches unambiguous, explicit status language. Returns "inbox" by default. +/// +/// This is intentionally narrow — false negatives (missing an icebox intent) are +/// harmless since the user can change the status dropdown in half a second. +/// False positives (wrongly setting blocked/icebox) are more disruptive. +pub fn detect_status_from_keywords(input: &str) -> &'static str { + let lower = input.to_lowercase(); + + // Check for blocked — explicit blocking language + if lower.contains("blocked") + || lower.contains("waiting on") + || lower.contains("can't proceed") + || lower.contains("stuck on") + { + return "blocked"; + } + + // Check for icebox — only very explicit icebox/ice box mentions + if lower.contains("icebox") || lower.contains("ice box") { + return "icebox"; + } + + // Check for in-progress — explicit "already doing" language + if lower.contains("in progress") + || lower.contains("in-progress") + || lower.contains("already started") + || lower.contains("working on") + { + return "in-progress"; + } + + "inbox" +} + +// ============================================================================= +// Unit Tests (deterministic, runs in normal test suite) +// ============================================================================= + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn keyword_default_is_inbox() { + assert_eq!(detect_status_from_keywords("Buy groceries"), "inbox"); + assert_eq!(detect_status_from_keywords("Call the dentist tomorrow"), "inbox"); + assert_eq!(detect_status_from_keywords("Review the mockups"), "inbox"); + } + + #[test] + fn keyword_detects_blocked() { + assert_eq!(detect_status_from_keywords("This is blocked by the security review"), "blocked"); + assert_eq!(detect_status_from_keywords("Waiting on the client to respond"), "blocked"); + assert_eq!(detect_status_from_keywords("Can't proceed until we get approval"), "blocked"); + assert_eq!(detect_status_from_keywords("Stuck on the API migration"), "blocked"); + } + + #[test] + fn keyword_detects_icebox() { + assert_eq!(detect_status_from_keywords("Icebox task to learn piano"), "icebox"); + assert_eq!(detect_status_from_keywords("Put this in the ice box"), "icebox"); + } + + #[test] + fn keyword_icebox_is_narrow() { + // "maybe" and "might" alone should NOT trigger icebox + assert_eq!(detect_status_from_keywords("Maybe call the bank"), "inbox"); + assert_eq!(detect_status_from_keywords("I might need to do this"), "inbox"); + assert_eq!(detect_status_from_keywords("One day learn guitar"), "inbox"); + assert_eq!(detect_status_from_keywords("Eventually get around to it"), "inbox"); + } + + #[test] + fn keyword_detects_in_progress() { + assert_eq!(detect_status_from_keywords("This is in progress"), "in-progress"); + assert_eq!(detect_status_from_keywords("Already started the refactor"), "in-progress"); + assert_eq!(detect_status_from_keywords("Working on the dashboard"), "in-progress"); + } + + #[test] + fn keyword_case_insensitive() { + assert_eq!(detect_status_from_keywords("This is BLOCKED"), "blocked"); + assert_eq!(detect_status_from_keywords("ICEBOX this task"), "icebox"); + assert_eq!(detect_status_from_keywords("IN PROGRESS refactor"), "in-progress"); + } +} + // ============================================================================= // Evaluation Harness // ============================================================================= @@ -387,9 +473,12 @@ mod eval { let response = crate::apple_intelligence::process_text(&system_prompt, input, 0) .expect("Apple Intelligence call failed"); - let result = parse_ai_response(&response, input, &projects, &areas) + let mut result = parse_ai_response(&response, input, &projects, &areas) .expect("Response parsing failed"); + // Apply keyword detection (same as production code path) + result.status = detect_status_from_keywords(input).to_string(); + let mut failures = Vec::new(); // Check title @@ -610,7 +699,7 @@ mod eval { ( "Call the dentist tomorrow about that crown", Expected { - title_contains: "dentist", status: "ready", + title_contains: "dentist", status: "inbox", project: None, area: None, scheduled: Some("2026-03-26"), due: None, defer: None, body_empty: None, @@ -619,7 +708,7 @@ mod eval { ( "Pick up the dry cleaning tomorrow", Expected { - title_contains: "dry cleaning", status: "ready", + title_contains: "dry cleaning", status: "inbox", project: None, area: None, scheduled: Some("2026-03-26"), due: None, defer: None, body_empty: None, @@ -628,7 +717,7 @@ mod eval { ( "Send that email to Sarah tomorrow morning", Expected { - title_contains: "Sarah", status: "ready", + title_contains: "Sarah", status: "inbox", project: None, area: None, scheduled: Some("2026-03-26"), due: None, defer: None, body_empty: None, @@ -674,7 +763,7 @@ mod eval { ( "Buy milk this afternoon", Expected { - title_contains: "milk", status: "ready", + title_contains: "milk", status: "inbox", project: None, area: None, scheduled: Some("2026-03-25"), due: None, defer: None, body_empty: None, @@ -683,7 +772,7 @@ mod eval { ( "Call the bank today about that charge", Expected { - title_contains: "bank", status: "ready", + title_contains: "bank", status: "inbox", project: None, area: None, scheduled: Some("2026-03-25"), due: None, defer: None, body_empty: None, @@ -738,7 +827,7 @@ mod eval { ( "Maybe one day learn to play guitar", Expected { - title_contains: "guitar", status: "icebox", + title_contains: "guitar", status: "inbox", project: None, area: None, scheduled: None, due: None, defer: None, body_empty: None, @@ -747,7 +836,7 @@ mod eval { ( "I might eventually look into getting a motorbike licence", Expected { - title_contains: "motorbike", status: "icebox", + title_contains: "motorbike", status: "inbox", project: None, area: None, scheduled: None, due: None, defer: None, body_empty: None, @@ -770,7 +859,7 @@ mod eval { ( "Can't finish the Garden Renovation until the quote comes back", Expected { - title_contains: "Garden", status: "blocked", + title_contains: "Garden", status: "inbox", // no explicit "blocked" keyword project: Some("p-garden"), area: None, scheduled: None, due: None, defer: None, body_empty: None, diff --git a/tdn-desktop/src-tauri/src/commands/ai_prompts.rs b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs index a73826fd..2470ee12 100644 --- a/tdn-desktop/src-tauri/src/commands/ai_prompts.rs +++ b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs @@ -51,12 +51,6 @@ title: Rewrite the input as a concise, actionable task title. body: Include only if the input has meaningful detail beyond what the title captures. \ Otherwise empty string. -status: Use 'inbox' unless the input clearly indicates otherwise. \ -Use 'ready' only for explicit immediacy ('today', 'this afternoon', 'right now'). \ -Use 'blocked' only if the input says something is blocked or waiting. \ -Use 'icebox' only for explicit maybe/someday language. \ -Use 'inProgress' only if the input says work has already started. - project: Set only if the input explicitly names a project from the list above. \ Empty string if no project is mentioned by name. @@ -88,17 +82,17 @@ fn build_examples_block(today: &str) -> String { "Examples:\n\ \n\ Input: \"Buy groceries for the week\"\n\ - Output: {{\"title\":\"Buy groceries\",\"body\":\"\",\"status\":\"inbox\",\ + Output: {{\"title\":\"Buy groceries\",\"body\":\"\",\ \"due\":\"\",\"scheduled\":\"\",\"deferUntil\":\"\",\"project\":\"\",\"area\":\"\"}}\n\ \n\ Input: \"Call the dentist tomorrow about that crown\"\n\ - Output: {{\"title\":\"Call dentist about crown\",\"body\":\"\",\"status\":\"ready\",\ + Output: {{\"title\":\"Call dentist about crown\",\"body\":\"\",\ \"due\":\"\",\"scheduled\":\"{tomorrow}\",\"deferUntil\":\"\",\"project\":\"\",\"area\":\"\"}}\n\ \n\ - Input: \"I need to submit the Q1 tax return by April 15th, gather all the receipts first\"\n\ - Output: {{\"title\":\"Submit Q1 tax return\",\"body\":\"Gather all receipts first.\",\ - \"status\":\"inbox\",\"due\":\"2026-04-15\",\"scheduled\":\"\",\"deferUntil\":\"\",\ - \"project\":\"Q1 Tax Preparation\",\"area\":\"\"}}" + Input: \"Finish the Newsletter Setup landing page by end of March\"\n\ + Output: {{\"title\":\"Finish Newsletter Setup landing page\",\"body\":\"\",\ + \"due\":\"2026-03-31\",\"scheduled\":\"\",\"deferUntil\":\"\",\ + \"project\":\"Newsletter Setup\",\"area\":\"\"}}" ) } diff --git a/tdn-desktop/src-tauri/swift/apple_intelligence.swift b/tdn-desktop/src-tauri/swift/apple_intelligence.swift index 9ad9dfb7..0f1980b7 100644 --- a/tdn-desktop/src-tauri/swift/apple_intelligence.swift +++ b/tdn-desktop/src-tauri/swift/apple_intelligence.swift @@ -12,8 +12,6 @@ private struct ParsedTask: Sendable { @Guide(description: "Extra detail, or empty string") let body: String - let status: ParsedStatus - @Guide(description: "YYYY-MM-DD or empty string") let due: String @@ -30,16 +28,6 @@ private struct ParsedTask: Sendable { let area: String } -@available(macOS 26.0, *) -@Generable -private enum ParsedStatus: Sendable { - case inbox - case icebox - case ready - case inProgress - case blocked -} - // MARK: - Helpers private typealias ResponsePointer = UnsafeMutablePointer @@ -59,19 +47,6 @@ private func stripInvisibleChars(_ text: String) -> String { .replacingOccurrences(of: "\u{FEFF}", with: "") // BOM } -// MARK: - Convert ParsedStatus to string - -@available(macOS 26.0, *) -private func statusToString(_ status: ParsedStatus) -> String { - switch status { - case .inbox: return "inbox" - case .icebox: return "icebox" - case .ready: return "ready" - case .inProgress: return "in-progress" - case .blocked: return "blocked" - } -} - // MARK: - Convert ParsedTask to JSON string @available(macOS 26.0, *) @@ -80,7 +55,6 @@ private func parsedTaskToJSON(_ task: ParsedTask) -> String { let fields: [(String, String)] = [ ("title", task.title), ("body", task.body), - ("status", statusToString(task.status)), ("due", task.due), ("scheduled", task.scheduled), ("deferUntil", task.deferUntil), diff --git a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx index dabd1186..29ffecaf 100644 --- a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx +++ b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx @@ -295,15 +295,13 @@ export default function QuickPaneApp() { setShowBody(true) } - // Map status string to TaskStatus + // Set status from keyword detection (Rust handles this, not the LLM) const validStatuses: TaskStatus[] = [ 'inbox', 'icebox', 'ready', 'in-progress', 'blocked', - 'dropped', - 'done', ] if (validStatuses.includes(parsed.status as TaskStatus)) { setStatus(parsed.status as TaskStatus) @@ -315,6 +313,19 @@ export default function QuickPaneApp() { if (parsed.projectId) setProjectId(parsed.projectId) if (parsed.areaId) setAreaId(parsed.areaId) + // Auto-ready Rule 2 (AI only): if scheduled within 7 days and status + // is still inbox (keyword detection didn't override), promote to ready. + if (parsed.status === 'inbox' && parsed.scheduled) { + const scheduledDate = new Date(parsed.scheduled + 'T00:00:00') + const now = new Date() + const daysUntil = Math.floor( + (scheduledDate.getTime() - now.getTime()) / (1000 * 60 * 60 * 24) + ) + if (daysUntil >= 0 && daysUntil <= 7) { + setStatus('ready') + } + } + logger.info('AI processing complete') } catch (error) { logger.error('Unexpected error during AI processing', { error }) From 4806aad2d84f2cde17cc5432726f5798c938add2 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 01:33:06 +0000 Subject: [PATCH 19/32] Tighten status keyword detection to reduce false positives Remove "already started", "working on", "stuck on", "can't proceed" as these are less explicit and could match unintended inputs. Add fat-finger variants: "waitingon", "inprogress", "ice-box". Co-Authored-By: Claude Opus 4.6 (1M context) --- tdn-desktop/src-tauri/src/commands/ai.rs | 36 ++++++++++++++++-------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs index e37ff51e..dc0bc1e9 100644 --- a/tdn-desktop/src-tauri/src/commands/ai.rs +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -299,22 +299,23 @@ pub fn detect_status_from_keywords(input: &str) -> &'static str { // Check for blocked — explicit blocking language if lower.contains("blocked") || lower.contains("waiting on") - || lower.contains("can't proceed") - || lower.contains("stuck on") + || lower.contains("waitingon") { return "blocked"; } - // Check for icebox — only very explicit icebox/ice box mentions - if lower.contains("icebox") || lower.contains("ice box") { + // Check for icebox — only very explicit mentions + if lower.contains("icebox") + || lower.contains("ice box") + || lower.contains("ice-box") + { return "icebox"; } - // Check for in-progress — explicit "already doing" language + // Check for in-progress — explicit mentions only if lower.contains("in progress") || lower.contains("in-progress") - || lower.contains("already started") - || lower.contains("working on") + || lower.contains("inprogress") { return "in-progress"; } @@ -341,19 +342,24 @@ mod tests { fn keyword_detects_blocked() { assert_eq!(detect_status_from_keywords("This is blocked by the security review"), "blocked"); assert_eq!(detect_status_from_keywords("Waiting on the client to respond"), "blocked"); - assert_eq!(detect_status_from_keywords("Can't proceed until we get approval"), "blocked"); - assert_eq!(detect_status_from_keywords("Stuck on the API migration"), "blocked"); + assert_eq!(detect_status_from_keywords("waitingon client response"), "blocked"); + } + + #[test] + fn keyword_blocked_is_narrow() { + assert_eq!(detect_status_from_keywords("Can't proceed until we get approval"), "inbox"); + assert_eq!(detect_status_from_keywords("Stuck on the API migration"), "inbox"); } #[test] fn keyword_detects_icebox() { assert_eq!(detect_status_from_keywords("Icebox task to learn piano"), "icebox"); assert_eq!(detect_status_from_keywords("Put this in the ice box"), "icebox"); + assert_eq!(detect_status_from_keywords("ice-box this for later"), "icebox"); } #[test] fn keyword_icebox_is_narrow() { - // "maybe" and "might" alone should NOT trigger icebox assert_eq!(detect_status_from_keywords("Maybe call the bank"), "inbox"); assert_eq!(detect_status_from_keywords("I might need to do this"), "inbox"); assert_eq!(detect_status_from_keywords("One day learn guitar"), "inbox"); @@ -363,8 +369,14 @@ mod tests { #[test] fn keyword_detects_in_progress() { assert_eq!(detect_status_from_keywords("This is in progress"), "in-progress"); - assert_eq!(detect_status_from_keywords("Already started the refactor"), "in-progress"); - assert_eq!(detect_status_from_keywords("Working on the dashboard"), "in-progress"); + assert_eq!(detect_status_from_keywords("Mark as in-progress"), "in-progress"); + assert_eq!(detect_status_from_keywords("inprogress task"), "in-progress"); + } + + #[test] + fn keyword_in_progress_is_narrow() { + assert_eq!(detect_status_from_keywords("Already started the refactor"), "inbox"); + assert_eq!(detect_status_from_keywords("Working on the dashboard"), "inbox"); } #[test] From 4aa065d0c2440af55d2d59ad281bef7377bfd5b7 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 01:35:44 +0000 Subject: [PATCH 20/32] Mark Phases 6-7 complete, remove duplicate phase, renumber Co-Authored-By: Claude Opus 4.6 (1M context) --- .../task-x-quick-entry-ai-processing.md | 51 +++---------------- 1 file changed, 8 insertions(+), 43 deletions(-) diff --git a/docs/tasks-todo/task-x-quick-entry-ai-processing.md b/docs/tasks-todo/task-x-quick-entry-ai-processing.md index a94cfda4..4e277cb5 100644 --- a/docs/tasks-todo/task-x-quick-entry-ai-processing.md +++ b/docs/tasks-todo/task-x-quick-entry-ai-processing.md @@ -111,49 +111,14 @@ Current baseline: **11/31 passing**. Most failures are date arithmetic and proje Done. `useEffect` in `QuickPaneApp.tsx` watches `[projectId, areaId, scheduled, deferUntil]` and promotes `inbox` → `ready` when `(project OR area) AND (scheduled OR defer)` are set. Standalone commit, cherry-pickable. -### Phase 7: Deterministic Status for AI Processing - -Remove status from Apple Intelligence. Status is better handled by deterministic rules. - -**Background:** The LLM is inconsistent with status (sometimes "ready" for "tomorrow", sometimes not). The cases where it adds value (icebox, blocked) are rare and can be detected via keyword matching. - -**Step 1: Remove status from the LLM** -- Remove `ParsedStatus` enum and `status` field from `ParsedTask` in `apple_intelligence.swift` -- Remove `parsedTaskToJSON` status handling -- Remove status from the prompt in `ai_prompts.rs` (both field instructions and few-shot examples) -- Always return `inbox` as status from `ParsedQuickEntry` -- This simplifies the `@Generable` struct from 8 fields to 7, giving the model more capacity - -**Step 2: Keyword-based status detection in Rust** -- New function in `ai.rs`: `detect_status_from_keywords(input: &str) -> &str` -- Scans the original input text (not the AI response) for explicit, unambiguous status phrases: - - `icebox` / `ice box` → `icebox` - - `blocked` / `waiting on` / `can't proceed` / `stuck on` → `blocked` - - `in progress` / `already started` / `working on` → `in-progress` -- Narrow keywords only. "Maybe" alone is NOT icebox. "Might" is NOT icebox. -- Returns `inbox` if no keywords found -- **Write unit tests** — deterministic, part of the normal test suite - -**Step 3: Auto-ready Rule 2 (AI only, near-term scheduled)** -- In `handleProcessWithAI` in `QuickPaneApp.tsx`, after all AI fields are populated -- If status is still `inbox` (keyword detection didn't override) AND `scheduled` is within 7 days of today → set to `ready` -- Catches "call Dave this afternoon" and "pick up laundry tomorrow" -- Note: Phase 6's Rule 1 (useEffect) will also fire if project/area + dates are set, so both rules complement each other - -**Step 4: Update eval harness and tests** -- Remove status expectations from eval cases where status was tested as LLM output -- Eval harness should test the *final* status after keyword detection + auto-ready rules, not the raw LLM output -- Add unit tests for `detect_status_from_keywords` -- Re-run eval harness to measure improvement - -### Phase 7: Fix Few-Shot Contamination - -Remove or redesign the third few-shot example (Q1 tax return) to avoid body contamination (model copies "Gather all receipts first" from the example into real responses). Options: -- Make example inputs much more distinct from likely real inputs -- Use a fictional project/area name that doesn't appear in real data -- Remove body content from all examples (always show `"body":""`) - -Quick prompt-only change in `ai_prompts.rs`, testable via the eval harness. +### Phase 7: Deterministic Status for AI Processing ✅ + +Done. Status removed from `@Generable` struct (8→7 fields) and prompt. Status now determined by: +- Keyword detection in Rust: `blocked` / `waiting on` / `waitingon` → blocked; `icebox` / `ice box` / `ice-box` → icebox; `in progress` / `in-progress` / `inprogress` → in-progress; everything else → inbox +- Auto-ready Rule 2 in frontend: if AI sets scheduled within 7 days and status is inbox → ready +- 8 unit tests for keyword detection in the normal test suite + +Also fixed few-shot contamination: replaced Q1 tax return example (which leaked "Gather all receipts first" into responses) with Newsletter Setup example. ### Phase 8: Deterministic Date and Project/Area Resolution From 940e9b43f9922fd58acd212151d1889682a29f09 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 01:44:47 +0000 Subject: [PATCH 21/32] Fix auto-ready Rule 2 timezone bug for same-day scheduled dates Comparing scheduled date (midnight) against current time (e.g. 2 AM) gave a negative daysUntil, preventing promotion to ready for tasks scheduled today. Now compares date-only strings via getTodayISO() so time-of-day doesn't affect the calculation. Co-Authored-By: Claude Opus 4.6 (1M context) --- tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx index 29ffecaf..935f2889 100644 --- a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx +++ b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx @@ -315,11 +315,13 @@ export default function QuickPaneApp() { // Auto-ready Rule 2 (AI only): if scheduled within 7 days and status // is still inbox (keyword detection didn't override), promote to ready. + // Compare date strings (YYYY-MM-DD) to avoid time-of-day issues. if (parsed.status === 'inbox' && parsed.scheduled) { + const todayStr = getTodayISO() + const todayDate = new Date(todayStr + 'T00:00:00') const scheduledDate = new Date(parsed.scheduled + 'T00:00:00') - const now = new Date() - const daysUntil = Math.floor( - (scheduledDate.getTime() - now.getTime()) / (1000 * 60 * 60 * 24) + const daysUntil = Math.round( + (scheduledDate.getTime() - todayDate.getTime()) / (1000 * 60 * 60 * 24) ) if (daysUntil >= 0 && daysUntil <= 7) { setStatus('ready') From df686b1646290b234d8d3d5c108f18a9b132b8ee Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 02:24:08 +0000 Subject: [PATCH 22/32] Add deterministic date resolution and fuzzy project/area matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes the LLM from computing YYYY-MM-DD dates to extracting raw date expressions ("tomorrow", "next Monday", "end of March"). Rust resolves them deterministically via the fuzzydate crate, with custom handlers for "end of [month]", "in N weeks", ordinal suffixes, and "on/by [day]" patterns. Adds fuzzy project/area matching using case-insensitive substring with a 3-char minimum to prevent spurious short matches. "Japan Trip" now matches "Japan Trip 2025". 19 unit tests for the resolver/matcher (deterministic, normal suite). Eval harness: 11/31 → 16/31 passing. Co-Authored-By: Claude Opus 4.6 (1M context) --- tdn-desktop/src-tauri/Cargo.lock | 28 +- tdn-desktop/src-tauri/Cargo.toml | 1 + tdn-desktop/src-tauri/src/commands/ai.rs | 65 +-- .../src-tauri/src/commands/ai_prompts.rs | 55 +-- .../src-tauri/src/commands/ai_resolve.rs | 458 ++++++++++++++++++ tdn-desktop/src-tauri/src/commands/mod.rs | 1 + .../src-tauri/swift/apple_intelligence.swift | 18 +- 7 files changed, 528 insertions(+), 98 deletions(-) create mode 100644 tdn-desktop/src-tauri/src/commands/ai_resolve.rs diff --git a/tdn-desktop/src-tauri/Cargo.lock b/tdn-desktop/src-tauri/Cargo.lock index e7ae60dd..57ce1c2c 100644 --- a/tdn-desktop/src-tauri/Cargo.lock +++ b/tdn-desktop/src-tauri/Cargo.lock @@ -124,7 +124,7 @@ dependencies = [ "objc2-foundation", "parking_lot", "percent-encoding", - "windows-sys 0.59.0", + "windows-sys 0.60.2", "wl-clipboard-rs", "x11rb", ] @@ -964,7 +964,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -1175,7 +1175,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -1460,6 +1460,17 @@ dependencies = [ "slab", ] +[[package]] +name = "fuzzydate" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51ddfc7f800df80b58d70db49cd96a1567c372c65bc8e4fa1ba728a9741426a2" +dependencies = [ + "chrono", + "lazy_static", + "thiserror 2.0.17", +] + [[package]] name = "fxhash" version = "0.2.1" @@ -3090,7 +3101,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d8fae84b431384b68627d0f9b3b1245fcf9f46f6c0e3dc902e9dce64edd1967" dependencies = [ "libc", - "windows-sys 0.45.0", + "windows-sys 0.61.2", ] [[package]] @@ -3615,7 +3626,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -4013,7 +4024,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4721,6 +4732,7 @@ version = "0.1.2" dependencies = [ "chrono", "dirs", + "fuzzydate", "globset", "gray_matter", "log", @@ -5299,7 +5311,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -6218,7 +6230,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/tdn-desktop/src-tauri/Cargo.toml b/tdn-desktop/src-tauri/Cargo.toml index df61707c..759bd5d7 100644 --- a/tdn-desktop/src-tauri/Cargo.toml +++ b/tdn-desktop/src-tauri/Cargo.toml @@ -54,6 +54,7 @@ specta = { version = "=2.0.0-rc.22", features = ["derive", "serde_json"] } tauri-specta = { version = "=2.0.0-rc.21", features = ["typescript"] } specta-typescript = "=0.0.9" tauri-plugin-deep-link = "2" +fuzzydate = "0.4.0" [target.'cfg(not(any(target_os = "android", target_os = "ios")))'.dependencies] tauri-plugin-single-instance = "2" diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs index dc0bc1e9..4067f517 100644 --- a/tdn-desktop/src-tauri/src/commands/ai.rs +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -95,7 +95,7 @@ pub fn process_quick_entry_text( log::info!("Raw response: {response}"); - let mut result = parse_ai_response(&response, trimmed, &projects, &areas)?; + let mut result = parse_ai_response(&response, trimmed, &projects, &areas, today.date_naive())?; // Determine status via keyword detection (not LLM) result.status = detect_status_from_keywords(trimmed).to_string(); @@ -146,12 +146,14 @@ fn strip_code_fences(s: &str) -> &str { } /// Parse the AI response JSON into a `ParsedQuickEntry`, resolving project/area names to IDs. +/// `today` is used for resolving relative date expressions. #[cfg(all(target_os = "macos", target_arch = "aarch64"))] fn parse_ai_response( response: &str, original_text: &str, projects: &[ProjectContext], areas: &[NameIdPair], + today: chrono::NaiveDate, ) -> Result { // Try to parse as JSON (structured output from @Generable). // Also handles fallback where model returns JSON wrapped in markdown code fences. @@ -189,17 +191,21 @@ fn parse_ai_response( // Status is determined by keyword detection, not the LLM let status = "inbox".to_string(); - let due = non_empty_date(parsed["due"].as_str()); - let scheduled = non_empty_date(parsed["scheduled"].as_str()); - let defer_until = non_empty_date(parsed["deferUntil"].as_str()); + // Resolve date expressions deterministically + let due_ref = parsed["dueRef"].as_str().unwrap_or("").trim(); + let scheduled_ref = parsed["scheduledRef"].as_str().unwrap_or("").trim(); + let defer_ref = parsed["deferUntilRef"].as_str().unwrap_or("").trim(); - // Match project name to ID (case-insensitive exact match) + let due = super::ai_resolve::resolve_date_expression(due_ref, today); + let scheduled = super::ai_resolve::resolve_date_expression(scheduled_ref, today); + let defer_until = super::ai_resolve::resolve_date_expression(defer_ref, today); + + // Match project/area names with fuzzy (substring) matching let project_name = parsed["project"].as_str().unwrap_or("").trim(); - let project_id = match_project_name_to_id(project_name, projects); + let project_id = super::ai_resolve::match_project_fuzzy(project_name, projects); - // Match area name to ID (case-insensitive exact match) let area_name = parsed["area"].as_str().unwrap_or("").trim(); - let area_id = match_name_to_id(area_name, areas); + let area_id = super::ai_resolve::match_area_fuzzy(area_name, areas); Ok(ParsedQuickEntry { title, @@ -228,22 +234,6 @@ fn parse_ai_response( } } -/// Validate a date string is in YYYY-MM-DD format and return Some, or None if empty/invalid. -#[cfg(all(target_os = "macos", target_arch = "aarch64"))] -fn non_empty_date(s: Option<&str>) -> Option { - let s = s?.trim(); - if s.is_empty() { - return None; - } - // Validate YYYY-MM-DD format - if chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok() { - Some(s.to_string()) - } else { - log::warn!("AI returned invalid date format: {s}"); - None - } -} - /// Check if two strings are essentially the same (ignoring case, trailing punctuation, whitespace). /// Used to avoid duplicating content when the AI parrots back the input. #[cfg(all(target_os = "macos", target_arch = "aarch64"))] @@ -259,30 +249,6 @@ fn is_essentially_same(a: &str, b: &str) -> bool { normalize(a) == normalize(b) } -/// Case-insensitive exact match of a project name to its ID. -#[cfg(all(target_os = "macos", target_arch = "aarch64"))] -fn match_project_name_to_id(name: &str, projects: &[ProjectContext]) -> Option { - if name.is_empty() { - return None; - } - projects - .iter() - .find(|p| p.name.eq_ignore_ascii_case(name)) - .map(|p| p.id.clone()) -} - -/// Case-insensitive exact match of a name to an ID from a list of name/ID pairs. -#[cfg(all(target_os = "macos", target_arch = "aarch64"))] -fn match_name_to_id(name: &str, pairs: &[NameIdPair]) -> Option { - if name.is_empty() { - return None; - } - pairs - .iter() - .find(|p| p.name.eq_ignore_ascii_case(name)) - .map(|p| p.id.clone()) -} - // ============================================================================= // Keyword-Based Status Detection // ============================================================================= @@ -485,7 +451,8 @@ mod eval { let response = crate::apple_intelligence::process_text(&system_prompt, input, 0) .expect("Apple Intelligence call failed"); - let mut result = parse_ai_response(&response, input, &projects, &areas) + let eval_today = chrono::NaiveDate::parse_from_str(EVAL_DATE, "%Y-%m-%d").unwrap(); + let mut result = parse_ai_response(&response, input, &projects, &areas, eval_today) .expect("Response parsing failed"); // Apply keyword detection (same as production code path) diff --git a/tdn-desktop/src-tauri/src/commands/ai_prompts.rs b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs index 2470ee12..23ea60c0 100644 --- a/tdn-desktop/src-tauri/src/commands/ai_prompts.rs +++ b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs @@ -57,43 +57,34 @@ Empty string if no project is mentioned by name. area: Set only if the input explicitly names an area from the list above. \ Empty string if no area is mentioned by name. -due: Set only if the input contains deadline language ('due by', 'deadline', \ -'must be done by', 'no later than'). YYYY-MM-DD format. Empty string otherwise. +dueRef: If the input contains deadline language ('due by', 'deadline', 'must be done by', \ +'no later than', 'by [date]'), extract the date reference exactly as stated. \ +Examples: 'Friday', 'April 15th', 'end of March', 'end of next week'. \ +Empty string if no deadline is mentioned. -scheduled: Set only if the input specifies when to do the task ('tomorrow', \ -'on Monday', 'this Friday', 'schedule for next week'). YYYY-MM-DD format. \ -Empty string otherwise. Vague time references ('for the week', 'soon') are \ -NOT scheduled dates — use empty string. +scheduledRef: If the input says when to do the task, extract the date reference exactly \ +as stated. Examples: 'today', 'tomorrow', 'Monday', 'this Friday', 'next week'. \ +Empty string if no timing is mentioned. Vague references ('for the week', 'soon') \ +are NOT scheduled dates — use empty string. -deferUntil: Set only if the input explicitly mentions deferring ('not until', \ -'defer until', 'start after'). This is rare. YYYY-MM-DD format. \ -Empty string otherwise."; +deferUntilRef: If the input explicitly mentions deferring, extract the date reference. \ +Examples: 'after Monday', 'not until April'. This is rare. Empty string otherwise."; /// Build few-shot examples. These are the highest-impact technique for small models. -fn build_examples_block(today: &str) -> String { - // Compute tomorrow for the example - let tomorrow = chrono::NaiveDate::parse_from_str(today, "%Y-%m-%d") - .ok() - .and_then(|d| d.succ_opt()) - .map(|d| d.format("%Y-%m-%d").to_string()) - .unwrap_or_else(|| "tomorrow".to_string()); +/// Examples use raw date expressions (not YYYY-MM-DD) — Rust resolves them later. +fn build_examples_block(_today: &str) -> String { + "\ +Examples: - format!( - "Examples:\n\ - \n\ - Input: \"Buy groceries for the week\"\n\ - Output: {{\"title\":\"Buy groceries\",\"body\":\"\",\ - \"due\":\"\",\"scheduled\":\"\",\"deferUntil\":\"\",\"project\":\"\",\"area\":\"\"}}\n\ - \n\ - Input: \"Call the dentist tomorrow about that crown\"\n\ - Output: {{\"title\":\"Call dentist about crown\",\"body\":\"\",\ - \"due\":\"\",\"scheduled\":\"{tomorrow}\",\"deferUntil\":\"\",\"project\":\"\",\"area\":\"\"}}\n\ - \n\ - Input: \"Finish the Newsletter Setup landing page by end of March\"\n\ - Output: {{\"title\":\"Finish Newsletter Setup landing page\",\"body\":\"\",\ - \"due\":\"2026-03-31\",\"scheduled\":\"\",\"deferUntil\":\"\",\ - \"project\":\"Newsletter Setup\",\"area\":\"\"}}" - ) +Input: \"Buy groceries for the week\" +Output: {\"title\":\"Buy groceries\",\"body\":\"\",\"dueRef\":\"\",\"scheduledRef\":\"\",\"deferUntilRef\":\"\",\"project\":\"\",\"area\":\"\"} + +Input: \"Call the dentist tomorrow about that crown\" +Output: {\"title\":\"Call dentist about crown\",\"body\":\"\",\"dueRef\":\"\",\"scheduledRef\":\"tomorrow\",\"deferUntilRef\":\"\",\"project\":\"\",\"area\":\"\"} + +Input: \"Finish the Newsletter Setup landing page by end of March\" +Output: {\"title\":\"Finish Newsletter Setup landing page\",\"body\":\"\",\"dueRef\":\"end of March\",\"scheduledRef\":\"\",\"deferUntilRef\":\"\",\"project\":\"Newsletter Setup\",\"area\":\"\"}" + .to_string() } /// Build the structured context block showing areas and their projects. diff --git a/tdn-desktop/src-tauri/src/commands/ai_resolve.rs b/tdn-desktop/src-tauri/src/commands/ai_resolve.rs new file mode 100644 index 00000000..b2e1ea6d --- /dev/null +++ b/tdn-desktop/src-tauri/src/commands/ai_resolve.rs @@ -0,0 +1,458 @@ +//! Deterministic resolution of date expressions and fuzzy project/area matching. +//! +//! The LLM extracts raw date references ("tomorrow", "next Monday", "end of March") +//! and project/area name strings. This module resolves them to concrete values: +//! - Date expressions → YYYY-MM-DD strings via the `fuzzydate` crate +//! - Project/area names → matched IDs via case-insensitive substring matching + +use chrono::NaiveDate; + +use super::ai::{NameIdPair, ProjectContext}; + +// ============================================================================= +// Date Resolution +// ============================================================================= + +/// Resolve a natural language date expression to a YYYY-MM-DD string. +/// +/// Uses `fuzzydate::parse_relative_to` for natural language parsing, with +/// preprocessing to handle patterns fuzzydate doesn't support natively. +/// Returns None if the expression is empty or unparseable. +/// +/// Examples: +/// - "tomorrow" → "2026-03-27" (relative to 2026-03-26) +/// - "next Monday" → "2026-03-30" +/// - "April 15th" → "2026-04-15" +/// - "end of March" → "2026-03-31" +/// - "in 3 weeks" → 3 weeks from today +pub fn resolve_date_expression(expr: &str, today: NaiveDate) -> Option { + let trimmed = expr.trim(); + if trimmed.is_empty() { + return None; + } + + // If it's already a YYYY-MM-DD date (LLM might still output these), use it directly + if NaiveDate::parse_from_str(trimmed, "%Y-%m-%d").is_ok() { + return Some(trimmed.to_string()); + } + + // Try custom handlers first for patterns fuzzydate doesn't support + if let Some(date) = resolve_end_of_month(trimmed, today) { + return Some(date.format("%Y-%m-%d").to_string()); + } + if let Some(date) = resolve_in_n_weeks(trimmed, today) { + return Some(date.format("%Y-%m-%d").to_string()); + } + + // Preprocess: strip ordinal suffixes and "on" prefix that fuzzydate doesn't handle + let cleaned = preprocess_date_expr(trimmed); + + // Use fuzzydate to parse the expression relative to today + let reference = today.and_hms_opt(12, 0, 0)?; // noon to avoid edge cases + match fuzzydate::parse_relative_to(&cleaned, reference) { + Ok(parsed) => Some(parsed.date().format("%Y-%m-%d").to_string()), + Err(_) => { + log::debug!("Could not parse date expression: {trimmed:?}"); + None + } + } +} + +/// Preprocess a date expression to handle patterns fuzzydate doesn't support: +/// - Strip ordinal suffixes: "15th" → "15", "1st" → "1", "2nd" → "2", "3rd" → "3" +/// - Strip leading "on": "on Thursday" → "Thursday" +/// - Strip leading "by": "by Friday" → "Friday" +fn preprocess_date_expr(expr: &str) -> String { + let mut s = expr.to_string(); + + // Strip leading "on " or "by " + for prefix in &["on ", "by "] { + if let Some(rest) = s.to_lowercase().strip_prefix(prefix) { + s = expr[prefix.len()..].to_string(); + let _ = rest; // suppress unused warning + } + } + + // Strip ordinal suffixes from numbers: "15th" → "15" + let ordinal_re = regex::Regex::new(r"(\d+)(st|nd|rd|th)\b").unwrap(); + s = ordinal_re.replace_all(&s, "$1").to_string(); + + s +} + +/// Handle "end of [month]" / "end of the month" expressions. +fn resolve_end_of_month(expr: &str, today: NaiveDate) -> Option { + let lower = expr.to_lowercase(); + + if lower == "end of the month" || lower == "end of month" { + // Last day of the current month + return last_day_of_month(today.year(), today.month()); + } + + // "end of March", "end of April", etc. + let months = [ + ("january", 1), ("february", 2), ("march", 3), ("april", 4), + ("may", 5), ("june", 6), ("july", 7), ("august", 8), + ("september", 9), ("october", 10), ("november", 11), ("december", 12), + ]; + + if let Some(rest) = lower.strip_prefix("end of ") { + let rest = rest.trim(); + for (name, num) in &months { + if rest == *name { + let year = if *num < today.month() { + today.year() + 1 // month already passed → next year + } else { + today.year() + }; + return last_day_of_month(year, *num); + } + } + } + + None +} + +/// Handle "in N weeks" / "in N days" expressions. +fn resolve_in_n_weeks(expr: &str, today: NaiveDate) -> Option { + let lower = expr.to_lowercase(); + + // Word-to-number mapping for common cases + let word_to_num = |w: &str| -> Option { + match w { + "one" | "a" => Some(1), + "two" => Some(2), + "three" => Some(3), + "four" => Some(4), + "five" => Some(5), + "six" => Some(6), + "seven" => Some(7), + "eight" => Some(8), + _ => w.parse().ok(), + } + }; + + // "in N weeks" / "in N week" + if let Some(rest) = lower.strip_prefix("in ") { + let parts: Vec<&str> = rest.trim().split_whitespace().collect(); + if parts.len() == 2 { + if let Some(n) = word_to_num(parts[0]) { + if parts[1].starts_with("week") { + return Some(today + chrono::Duration::weeks(n)); + } + if parts[1].starts_with("day") { + return Some(today + chrono::Duration::days(n)); + } + if parts[1].starts_with("month") { + // Approximate: 30 days per month + return Some(today + chrono::Duration::days(n * 30)); + } + } + } + } + + None +} + +/// Get the last day of a given month. +fn last_day_of_month(year: i32, month: u32) -> Option { + if month == 12 { + NaiveDate::from_ymd_opt(year + 1, 1, 1) + .and_then(|d| d.pred_opt()) + } else { + NaiveDate::from_ymd_opt(year, month + 1, 1) + .and_then(|d| d.pred_opt()) + } +} + +use chrono::Datelike; + +// ============================================================================= +// Fuzzy Project/Area Matching +// ============================================================================= + +/// Match a project name from AI output against the available projects. +/// Uses case-insensitive substring matching with a minimum length guard. +/// Returns the project ID if matched, None otherwise. +pub fn match_project_fuzzy(name: &str, projects: &[ProjectContext]) -> Option { + let query = name.trim(); + if query.is_empty() { + return None; + } + + // Try exact match first (case-insensitive) + if let Some(p) = projects.iter().find(|p| p.name.eq_ignore_ascii_case(query)) { + return Some(p.id.clone()); + } + + // Substring match: query is a substring of a project name, or vice versa + // Minimum 3 characters to prevent spurious short matches + if query.len() >= 3 { + let lower_query = query.to_lowercase(); + // Check if query is contained in any project name + if let Some(p) = projects + .iter() + .find(|p| p.name.to_lowercase().contains(&lower_query)) + { + return Some(p.id.clone()); + } + // Check if any project name is contained in the query + if let Some(p) = projects + .iter() + .find(|p| lower_query.contains(&p.name.to_lowercase())) + { + return Some(p.id.clone()); + } + } + + None +} + +/// Match an area name from AI output against the available areas. +/// Uses case-insensitive substring matching with a minimum length guard. +/// Returns the area ID if matched, None otherwise. +pub fn match_area_fuzzy(name: &str, areas: &[NameIdPair]) -> Option { + let query = name.trim(); + if query.is_empty() { + return None; + } + + // Try exact match first (case-insensitive) + if let Some(a) = areas.iter().find(|a| a.name.eq_ignore_ascii_case(query)) { + return Some(a.id.clone()); + } + + // Substring match with minimum length guard + if query.len() >= 3 { + let lower_query = query.to_lowercase(); + if let Some(a) = areas + .iter() + .find(|a| a.name.to_lowercase().contains(&lower_query)) + { + return Some(a.id.clone()); + } + if let Some(a) = areas + .iter() + .find(|a| lower_query.contains(&a.name.to_lowercase())) + { + return Some(a.id.clone()); + } + } + + None +} + +// ============================================================================= +// Tests +// ============================================================================= + +#[cfg(test)] +mod tests { + use super::*; + + fn test_date() -> NaiveDate { + // Wednesday, 2026-03-25 (matches eval harness) + NaiveDate::from_ymd_opt(2026, 3, 25).unwrap() + } + + // ── Date resolution tests ──────────────────────────────────────────── + + #[test] + fn date_empty_returns_none() { + assert_eq!(resolve_date_expression("", test_date()), None); + assert_eq!(resolve_date_expression(" ", test_date()), None); + } + + #[test] + fn date_today() { + assert_eq!( + resolve_date_expression("today", test_date()), + Some("2026-03-25".into()) + ); + } + + #[test] + fn date_tomorrow() { + assert_eq!( + resolve_date_expression("tomorrow", test_date()), + Some("2026-03-26".into()) + ); + } + + #[test] + fn date_this_friday() { + // Wednesday March 25 → this Friday = March 27 + assert_eq!( + resolve_date_expression("this Friday", test_date()), + Some("2026-03-27".into()) + ); + } + + #[test] + fn date_next_monday() { + // Wednesday March 25 → next Monday = March 30 + assert_eq!( + resolve_date_expression("next Monday", test_date()), + Some("2026-03-30".into()) + ); + } + + #[test] + fn date_specific_month_day() { + assert_eq!( + resolve_date_expression("April 15th", test_date()), + Some("2026-04-15".into()) + ); + } + + #[test] + fn date_end_of_march() { + assert_eq!( + resolve_date_expression("end of March", test_date()), + Some("2026-03-31".into()) + ); + } + + #[test] + fn date_passthrough_iso() { + // Already YYYY-MM-DD → pass through + assert_eq!( + resolve_date_expression("2026-06-01", test_date()), + Some("2026-06-01".into()) + ); + } + + #[test] + fn date_nonsense_returns_none() { + assert_eq!(resolve_date_expression("banana", test_date()), None); + } + + #[test] + #[ignore] + fn date_explore_fuzzydate() { + let today = test_date(); + let cases = vec![ + "April 15", "April 15th", "15 April", "15th April", + "March 31", "March 31st", "end of March", "end of the month", + "Friday", "this Friday", "next Friday", + "in 3 weeks", "in two weeks", "in 2 weeks", + "Thursday", "on Thursday", + ]; + for c in cases { + let result = resolve_date_expression(c, today); + println!(" {:30} → {:?}", c, result); + } + } + + // ── Project fuzzy matching tests ───────────────────────────────────── + + fn test_projects() -> Vec { + vec![ + ProjectContext { + id: "p-japan".into(), + name: "Japan Trip 2025".into(), + area_name: Some("Travel".into()), + }, + ProjectContext { + id: "p-cli".into(), + name: "Open Source CLI Tool".into(), + area_name: Some("Coding".into()), + }, + ProjectContext { + id: "p-blog".into(), + name: "Tech Blog Relaunch".into(), + area_name: Some("Writing".into()), + }, + ] + } + + #[test] + fn project_exact_match() { + let projects = test_projects(); + assert_eq!( + match_project_fuzzy("Japan Trip 2025", &projects), + Some("p-japan".into()) + ); + } + + #[test] + fn project_exact_case_insensitive() { + let projects = test_projects(); + assert_eq!( + match_project_fuzzy("japan trip 2025", &projects), + Some("p-japan".into()) + ); + } + + #[test] + fn project_substring_partial_name() { + let projects = test_projects(); + // "Japan Trip" is a substring of "Japan Trip 2025" + assert_eq!( + match_project_fuzzy("Japan Trip", &projects), + Some("p-japan".into()) + ); + } + + #[test] + fn project_substring_middle() { + let projects = test_projects(); + assert_eq!( + match_project_fuzzy("CLI Tool", &projects), + Some("p-cli".into()) + ); + } + + #[test] + fn project_empty_returns_none() { + let projects = test_projects(); + assert_eq!(match_project_fuzzy("", &projects), None); + } + + #[test] + fn project_no_match() { + let projects = test_projects(); + assert_eq!(match_project_fuzzy("Nonexistent Project", &projects), None); + } + + #[test] + fn project_too_short_no_match() { + let projects = test_projects(); + // "Ja" is only 2 chars — below minimum, should not match + assert_eq!(match_project_fuzzy("Ja", &projects), None); + } + + // ── Area fuzzy matching tests ──────────────────────────────────────── + + fn test_areas() -> Vec { + vec![ + NameIdPair { id: "a-acme".into(), name: "Acme Corp".into() }, + NameIdPair { id: "a-finance".into(), name: "Finance".into() }, + NameIdPair { id: "a-home".into(), name: "Home".into() }, + ] + } + + #[test] + fn area_exact_match() { + let areas = test_areas(); + assert_eq!( + match_area_fuzzy("Acme Corp", &areas), + Some("a-acme".into()) + ); + } + + #[test] + fn area_substring_match() { + let areas = test_areas(); + assert_eq!( + match_area_fuzzy("Acme", &areas), + Some("a-acme".into()) + ); + } + + #[test] + fn area_no_match() { + let areas = test_areas(); + assert_eq!(match_area_fuzzy("Marketing", &areas), None); + } +} diff --git a/tdn-desktop/src-tauri/src/commands/mod.rs b/tdn-desktop/src-tauri/src/commands/mod.rs index 94e34c56..b9f831ef 100644 --- a/tdn-desktop/src-tauri/src/commands/mod.rs +++ b/tdn-desktop/src-tauri/src/commands/mod.rs @@ -5,6 +5,7 @@ pub mod ai; pub mod ai_prompts; +pub mod ai_resolve; pub mod config; pub mod notifications; pub mod preferences; diff --git a/tdn-desktop/src-tauri/swift/apple_intelligence.swift b/tdn-desktop/src-tauri/swift/apple_intelligence.swift index 0f1980b7..5afb22ee 100644 --- a/tdn-desktop/src-tauri/swift/apple_intelligence.swift +++ b/tdn-desktop/src-tauri/swift/apple_intelligence.swift @@ -12,14 +12,14 @@ private struct ParsedTask: Sendable { @Guide(description: "Extra detail, or empty string") let body: String - @Guide(description: "YYYY-MM-DD or empty string") - let due: String + @Guide(description: "Date reference for deadline, or empty string") + let dueRef: String - @Guide(description: "YYYY-MM-DD or empty string") - let scheduled: String + @Guide(description: "Date reference for when to do this, or empty string") + let scheduledRef: String - @Guide(description: "YYYY-MM-DD or empty string") - let deferUntil: String + @Guide(description: "Date reference for deferral, or empty string") + let deferUntilRef: String @Guide(description: "Project name or empty string") let project: String @@ -55,9 +55,9 @@ private func parsedTaskToJSON(_ task: ParsedTask) -> String { let fields: [(String, String)] = [ ("title", task.title), ("body", task.body), - ("due", task.due), - ("scheduled", task.scheduled), - ("deferUntil", task.deferUntil), + ("dueRef", task.dueRef), + ("scheduledRef", task.scheduledRef), + ("deferUntilRef", task.deferUntilRef), ("project", task.project), ("area", task.area), ] From f4529deb0ae2711f7ad4d91fb4a44ba9fbd3414b Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 02:29:30 +0000 Subject: [PATCH 23/32] Update task doc and developer doc for Phase 8 completion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Task doc: marks Phase 8 complete with results (16/31 eval passing), documents what works and remaining failures, adds Phase 9 (prompt refinement) with clear next steps. Developer doc: updates architecture to reflect current state — date expression extraction, fuzzydate resolution, fuzzy project matching, deterministic status via keywords, auto-ready rules. Adds eval harness docs and unit test info. Updates known limitations. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../task-x-quick-entry-ai-processing.md | 72 +++++----- .../docs/developer/apple-intelligence.md | 129 +++++++++++------- 2 files changed, 120 insertions(+), 81 deletions(-) diff --git a/docs/tasks-todo/task-x-quick-entry-ai-processing.md b/docs/tasks-todo/task-x-quick-entry-ai-processing.md index 4e277cb5..326bb221 100644 --- a/docs/tasks-todo/task-x-quick-entry-ai-processing.md +++ b/docs/tasks-todo/task-x-quick-entry-ai-processing.md @@ -105,7 +105,7 @@ Done. 31 test cases in `commands/ai.rs` covering simple inputs, project/area mat cd tdn-desktop/src-tauri && cargo test eval_ai --lib -- --ignored --nocapture ``` -Current baseline: **11/31 passing**. Most failures are date arithmetic and project matching — addressed in Phases 7 and 8. +Current baseline: **16/31 passing**. ### Phase 6: Auto-Ready on Quick Entry (non-AI, cherry-pickable) ✅ @@ -115,55 +115,59 @@ Done. `useEffect` in `QuickPaneApp.tsx` watches `[projectId, areaId, scheduled, Done. Status removed from `@Generable` struct (8→7 fields) and prompt. Status now determined by: - Keyword detection in Rust: `blocked` / `waiting on` / `waitingon` → blocked; `icebox` / `ice box` / `ice-box` → icebox; `in progress` / `in-progress` / `inprogress` → in-progress; everything else → inbox -- Auto-ready Rule 2 in frontend: if AI sets scheduled within 7 days and status is inbox → ready +- Auto-ready Rule 1 (all quick entry): `useEffect` promotes inbox → ready when (project OR area) AND (scheduled OR defer) are set +- Auto-ready Rule 2 (AI only): if scheduled within 7 days and status is inbox → ready - 8 unit tests for keyword detection in the normal test suite Also fixed few-shot contamination: replaced Q1 tax return example (which leaked "Gather all receipts first" into responses) with Newsletter Setup example. -### Phase 8: Deterministic Date and Project/Area Resolution +### Phase 8: Deterministic Date and Project/Area Resolution ✅ -Split the work into what the LLM is good at (language understanding, intent classification) and what deterministic code is good at (date arithmetic, fuzzy string matching). +Done. The LLM now extracts raw date expressions ("tomorrow", "next Monday", "end of March") instead of computing YYYY-MM-DD dates. Rust resolves them deterministically via the `fuzzydate` crate with custom handlers for patterns fuzzydate doesn't support natively. -**Date resolution:** +**Date resolution (`ai_resolve.rs`):** +- `fuzzydate::parse_relative_to()` handles: today, tomorrow, day names, "this/next [day]", "Month Day" format +- Custom handlers for: "end of [month]", "end of the month", "in N weeks/days", ordinal suffixes ("15th" → "15"), "on/by [day]" prefix stripping +- Falls back to None if unparseable — user sets date manually +- 19 unit tests (deterministic, normal test suite) -Change the `@Generable` struct so date fields capture the *raw reference and intent* rather than computed YYYY-MM-DD dates: +**Project/area matching (`ai_resolve.rs`):** +- Case-insensitive exact match first, then substring match (min 3 chars) +- "Japan Trip" now matches "Japan Trip 2025" via substring +- Bidirectional: checks if query is in name AND if name is in query -```swift -@Guide(description: "Raw date/time reference for scheduling intent, or empty string") -let scheduledRef: String // e.g. "tomorrow", "next Monday", "this Friday" +**What works well now:** +- Relative dates: "tomorrow" ✓, "this Friday" ✓, "next Monday" ✓ +- Absolute dates: "April 15th" ✓, "June 1st" ✓ +- End-of-month: "end of March" ✓, "end of the month" ✓ +- Deadline detection: "due by Friday" ✓, "deadline is June 1st" ✓ -@Guide(description: "Raw date/time reference for deadline intent, or empty string") -let dueRef: String // e.g. "by April 15th", "by end of next week" -``` - -The LLM's job becomes: (1) identify whether a date reference exists, (2) classify it as scheduled vs. due vs. defer intent, (3) extract the reference text. Crucially, the LLM still decides whether "this Friday" is a scheduling intent for *this task* vs. just contextual information about something else — that's a language understanding judgment the LLM should make. - -Rust then resolves the expression to a date deterministically. Options for date parsing in Rust: -- `chrono` with hand-written pattern matching for common expressions -- A crate like `dateparser` or `chrono-english` (evaluate coverage) -- Simple keyword-based resolution ("tomorrow" → +1 day, "next Monday" → find next Monday, "April 15th" → parse month+day) - -Start with a small set of common patterns and fall back to empty if unparseable. The eval harness will show which patterns are most needed. +**Remaining failures (15/31):** +- LLM sometimes returns empty for date refs despite clear language ("this afternoon", "tomorrow morning", "end of next week") — the model inconsistently extracts expressions +- LLM sometimes returns empty for project names even when explicitly mentioned — fuzzy matching helps when the LLM returns a name, but can't help when it returns empty +- LLM fills in parent area when only project should be set (hallucination) +- These are prompt refinement problems, not resolution problems -**Project/area matching:** +### Phase 9: Prompt Refinement -Add fuzzy matching in Rust alongside the existing exact match. "Japan Trip" should match "Japan Trip 2025". Options: -- Case-insensitive substring matching (simplest) -- Levenshtein distance with a threshold -- Token overlap (split on spaces, check how many words match) +Iterate on the system prompt and few-shot examples to improve the LLM's extraction reliability. The eval harness (`cargo test eval_ai --lib -- --ignored --nocapture` from `src-tauri/`) makes this a fast feedback loop — edit `ai_prompts.rs`, rebuild, run eval, compare results. -Start with case-insensitive substring (covers the "Japan Trip" case) and evaluate via the harness. +Key areas to improve: +- LLM not extracting date refs when they're present ("this afternoon" → empty, "tomorrow morning" → empty) +- LLM not returning project names even when explicitly mentioned in input +- LLM hallucinating area when only project is referenced (fills in parent area) +- Consider whether additional few-shot examples showing date ref extraction would help -### Phase 9: Polish and Edge Cases +### Phase 10: Polish and Edge Cases - Re-processing support (user processes, edits title, processes again) - Cancellation during processing (Escape while LLM is running) - Very long input handling (context window limits?) -### Phase 10: Docs +### Phase 11: Docs -- Update develper quick-entry pane docs as needed -- Update userguide page on Quick Entry pane to mention - A) Auto-setting of status to Ready when (project || area ) && (scheduled || defer-until) are set. - B) Basic explanation of how the sparkle button works and what it's for, and when it's available. -- Update apple-intelligence.md developer doc as needed so it's accurate about how things currently work. Include a brief mention of how to use the eval test to iterate on prompts etc. +- Update developer quick-entry pane docs as needed +- Update userguide page on Quick Entry pane to mention: + A) Auto-setting of status to Ready when (project || area) && (scheduled || defer-until) are set + B) Basic explanation of how the sparkle button works and what it's for, and when it's available +- Update apple-intelligence.md developer doc so it's accurate about how things currently work, including the eval harness diff --git a/tdn-desktop/docs/developer/apple-intelligence.md b/tdn-desktop/docs/developer/apple-intelligence.md index e704ab40..07ea7417 100644 --- a/tdn-desktop/docs/developer/apple-intelligence.md +++ b/tdn-desktop/docs/developer/apple-intelligence.md @@ -42,13 +42,15 @@ The system prompt and user text are converted to C strings and passed through th The Swift function creates a `LanguageModelSession` with the system prompt as `instructions` (which the model is trained to prioritise over user input). It then calls `session.respond(to: userText, generating: ParsedTask.self)`. -`ParsedTask` is a `@Generable` struct — this is Apple's constrained decoding system. The model's token generation is structurally constrained to produce valid output matching the struct's fields. The `ParsedStatus` enum means the model literally cannot output an invalid status value. +`ParsedTask` is a `@Generable` struct — this is Apple's constrained decoding system. The model's token generation is structurally constrained to produce valid output matching the struct's fields. The struct has 7 fields: title, body, dueRef, scheduledRef, deferUntilRef, project, area. Note: status is NOT included — it's handled deterministically (see step 6). + +The date fields are `*Ref` fields — the model extracts raw date expressions ("tomorrow", "next Monday", "end of March") rather than computing YYYY-MM-DD dates. Date arithmetic is done in Rust. If `@Generable` succeeds (the normal path), the typed `ParsedTask` struct is manually serialized to a JSON string. If it fails (rare), the function falls back to a plain `session.respond()` call — the model typically returns a JSON code block in this case. Because the Swift call is `async` but the C FFI is synchronous, a `DispatchSemaphore` bridges the two. A detached task runs the inference, signals the semaphore on completion, and the calling thread blocks until it's done. This takes ~2-3 seconds on Apple Silicon. -### 6. Rust parses and validates the response +### 6. Rust parses, resolves, and validates the response Back in Rust, `parse_ai_response()` processes the JSON string through several stages: @@ -58,15 +60,23 @@ Back in Rust, `parse_ai_response()` processes the JSON string through several st **Body logic:** If the model transformed the title (it differs from the original input), the original text is preserved in the body — this ensures no context from dictation is lost. If the model also generated body text, it's only appended if it contains genuinely new information. A normalisation check (`is_essentially_same`) catches cases where the model just parrots the input back with minor punctuation changes. -**Date validation:** Each date string is parsed with `chrono::NaiveDate`. Valid YYYY-MM-DD is kept. Empty strings become `None`. Anything else (malformed dates, random text) is silently discarded. +**Date resolution (`ai_resolve.rs`):** The model returns raw date expressions (e.g. "tomorrow", "next Monday", "end of March"). Rust resolves these deterministically using the `fuzzydate` crate with custom handlers for patterns it doesn't support natively (ordinal suffixes, "end of [month]", "in N weeks", "on/by [day]" prefixes). Invalid or unparseable expressions become `None`. + +**Project/area matching (`ai_resolve.rs`):** The model returns a name string. Rust first tries case-insensitive exact match, then falls back to case-insensitive substring match (minimum 3 characters). This handles the common case where the model returns a truncated name ("Japan Trip" matches "Japan Trip 2025"). No match → field is left empty. + +**Status determination:** Status is NOT set by the LLM. Instead: +1. `detect_status_from_keywords()` scans the original input text for explicit status phrases: `blocked` / `waiting on` → blocked, `icebox` / `ice box` → icebox, `in progress` / `in-progress` → in-progress. Everything else → inbox. +2. The frontend applies auto-ready rules after (see step 7). -**Project/area matching:** The model returns a project or area name as a string. Rust does case-insensitive exact match against the provided list of names. A match returns the entity's hash ID (which the frontend uses for the dropdown selectors). No match → the field is left empty for the user to set manually. This is a deliberate safety net — the model sometimes hallucinates project/area names that don't exist, and the exact matching silently drops them. +### 7. Frontend populates the form and applies auto-ready rules -**Status validation:** Must be one of `inbox`, `icebox`, `ready`, `in-progress`, `blocked`. Anything else defaults to `inbox`. +The React handler receives the `ParsedQuickEntry` result and sets each piece of form state: title, body (with the body section auto-expanding if populated), status, dates, project ID, and area ID. The UI updates immediately. -### 7. Frontend populates the form +Two auto-ready rules then apply: -The React handler receives the `ParsedQuickEntry` result and sets each piece of form state: title, body (with the body section auto-expanding if populated), status, dates, project ID, and area ID. The UI updates immediately — the user sees fields filled in and can adjust anything before saving. +**Rule 1 (all quick entry, not just AI):** A `useEffect` watches projectId, areaId, scheduled, and deferUntil. If `(project OR area) AND (scheduled OR defer)` are set and status is `inbox`, it auto-promotes to `ready`. A task with both a project/area and a when-to-do-it date has been "processed" — it doesn't need the inbox. + +**Rule 2 (AI only):** After AI processing, if the scheduled date is within 7 days of today and status is still `inbox` (keyword detection didn't override), promote to `ready`. Catches "call Dave tomorrow" style tasks. ### 8. User reviews and saves @@ -100,16 +110,17 @@ Swift: processTextWithSystemPrompt() │ serializes to JSON, strips invisible Unicode chars │ ▼ -Rust: parse_ai_response() +Rust: parse_ai_response() + ai_resolve │ strips markdown code fences (fallback path) │ parses JSON - │ validates dates (YYYY-MM-DD or discard) - │ matches project/area names → IDs (case-insensitive exact) + │ resolves date expressions → YYYY-MM-DD (fuzzydate + custom) + │ matches project/area names → IDs (substring fuzzy match) │ applies body logic (preserve original text, deduplicate) - │ validates status against known values + │ detects status from keywords (not LLM) │ ▼ React: populates form fields + applies auto-ready rules (Rule 1 + Rule 2) user reviews and saves normally ``` @@ -119,7 +130,7 @@ React: populates form fields Three files in `src-tauri/swift/`: -- `apple_intelligence.swift` — The real implementation. Contains the `@Generable ParsedTask` struct with `ParsedStatus` enum, the `LanguageModelSession` call, JSON serialization, and availability check. +- `apple_intelligence.swift` — The real implementation. Contains the `@Generable ParsedTask` struct (7 fields, no status), the `LanguageModelSession` call, JSON serialization, and availability check. - `apple_intelligence_stub.swift` — Compiled instead when the build SDK lacks FoundationModels. All functions return errors. - `apple_intelligence_bridge.h` — C header defining the `AppleLLMResponse` struct and function signatures shared between Swift and Rust. @@ -141,7 +152,7 @@ The Swift code bridges async/await to synchronous C using `DispatchSemaphore` + - `check_apple_intelligence_available()` → `bool` - `process_quick_entry_text(text, projects, areas)` → `Result` -The command builds the system prompt, calls the FFI, parses the response, validates fields, and resolves project/area names to IDs. +The command builds the system prompt, calls the FFI, parses the response, resolves dates and project/area names, and applies keyword-based status detection. ### Prompt Templates @@ -149,48 +160,38 @@ The command builds the system prompt, calls the FFI, parses the response, valida - `build_system_prompt()` — Assembles the complete prompt from role text, context, field instructions, and few-shot examples - `build_context_block()` — Formats areas and their projects as a structured list -- `build_examples_block()` — Generates few-shot input→output pairs (dynamically computes "tomorrow" from today's date) +- `build_examples_block()` — Few-shot input→output pairs showing raw date expression extraction + +### Date Resolution and Fuzzy Matching + +`src/commands/ai_resolve.rs` handles the deterministic parts of processing: + +- `resolve_date_expression(expr, today)` — Resolves natural language dates ("tomorrow", "next Monday", "end of March") to YYYY-MM-DD strings using the `fuzzydate` crate with custom handlers for ordinal suffixes, "end of [month]", "in N weeks", and "on/by" prefixes +- `match_project_fuzzy(name, projects)` — Case-insensitive exact match, then substring match (min 3 chars) +- `match_area_fuzzy(name, areas)` — Same approach for areas ## The @Generable Struct ```swift @Generable struct ParsedTask: Sendable { - let title: String // concise task title - let body: String // extra detail, or empty string - let status: ParsedStatus // constrained enum - let due: String // YYYY-MM-DD or empty string - let scheduled: String // YYYY-MM-DD or empty string - let deferUntil: String // YYYY-MM-DD or empty string - let project: String // project name or empty string - let area: String // area name or empty string -} - -@Generable -enum ParsedStatus: Sendable { - case inbox, icebox, ready, inProgress, blocked + let title: String // concise task title + let body: String // extra detail, or empty string + let dueRef: String // raw deadline expression, or empty string + let scheduledRef: String // raw scheduling expression, or empty string + let deferUntilRef: String // raw deferral expression, or empty string + let project: String // project name or empty string + let area: String // area name or empty string } ``` -`@Generable` uses constrained decoding — the model's token generation is structurally constrained to produce valid output matching the struct. The `ParsedStatus` enum means the model literally cannot output an invalid status. - -Each field has a `@Guide(description:)` annotation providing a short hint. The system prompt carries the detailed decision-making instructions. - -Properties generate in declaration order. Later properties can be influenced by earlier ones. Title is first (most important), optional fields are last. - -## Response Parsing Pipeline - -After receiving the JSON from Swift, Rust applies several transformations: +`@Generable` uses constrained decoding — the model's token generation is structurally constrained to produce valid output matching the struct. -**Code fence stripping:** If `@Generable` fails and the fallback produces a markdown-wrapped JSON block (`` ```json...``` ``), the fences are stripped before parsing. +Note: **status is not in the struct** — it was removed because the model was inconsistent with it. Status is now determined by keyword detection in Rust and auto-ready rules in the frontend. -**Body logic:** The raw dictated text is preserved in the body when the title was transformed (title != original input). If the AI also generated body text, it's appended only if it adds genuinely new content — checked via `is_essentially_same()` which normalises case and trailing punctuation to avoid duplication. +Date fields are `*Ref` fields containing raw expressions ("tomorrow", "next Monday", "end of March") rather than YYYY-MM-DD dates. The model is good at text extraction but bad at date arithmetic, so date computation is done deterministically in Rust. -**Date validation:** Each date string is parsed with `chrono::NaiveDate`. Valid YYYY-MM-DD is kept, anything else is silently discarded. - -**Project/area matching:** The model returns a name string. Rust does case-insensitive exact match against the provided list. No match → field is left empty. (Fuzzy matching is a planned improvement.) - -**Status validation:** Must be one of `inbox`, `icebox`, `ready`, `in-progress`, `blocked`. Anything else defaults to `inbox`. +Each field has a `@Guide(description:)` annotation providing a short hint. The system prompt carries the detailed decision-making instructions. Properties generate in declaration order. ## Frontend Integration @@ -200,6 +201,14 @@ The `Cmd+Shift+A` shortcut is registered in `useQuickPaneKeyboard` only when `on On successful processing, the handler populates all form state setters. The body section auto-expands if body content was generated. +### Auto-Ready Rules + +Two rules automatically promote `inbox` → `ready`: + +**Rule 1 (all quick entry):** A `useEffect` watches `[projectId, areaId, scheduled, deferUntil]`. When `(project OR area) AND (scheduled OR defer-until)` are set and status is `inbox`, it promotes to `ready`. This applies to manual entry too — it's not AI-specific. + +**Rule 2 (AI only):** After AI processing, if the scheduled date is within 7 days of today and status is still `inbox`, promote to `ready`. Catches "call Dave tomorrow" style tasks. + ## Logging All AI processing is logged at INFO level with a clear delimiter: @@ -221,6 +230,8 @@ The full system prompt is logged at DEBUG level. To see it, check the Tauri dev ## Iterating on Prompts +### Manual testing + 1. Edit `src/commands/ai_prompts.rs` — all prompt text is here 2. Restart the dev server (`bun run tauri:dev`) 3. Test with the quick pane @@ -229,11 +240,35 @@ The full system prompt is logged at DEBUG level. To see it, check the Tauri dev The few-shot examples in `build_examples_block()` are the highest-impact thing to change. Keep examples distinct from likely real inputs to avoid contamination (the model copying example content into real responses). +### Evaluation harness + +A faster feedback loop for prompt iteration. 31 test cases covering simple inputs, project/area matching, date extraction, status detection, complex dictation, and hallucination traps. + +``` +cd tdn-desktop/src-tauri && cargo test eval_ai --lib -- --ignored --nocapture +``` + +Takes ~50 seconds (31 LLM calls). Prints a per-case pass/fail summary with raw values and failure details. Uses fixed context (hardcoded projects, areas, date=2026-03-25 Wednesday) for reproducibility. + +The harness does NOT assert on failure — it's a measurement tool, not a hard test. Some failures are expected while iterating on prompts. + +Current baseline: **16/31 passing**. + +### Unit tests + +Deterministic logic (date resolution, fuzzy matching, keyword status detection) has standard unit tests that run in the normal test suite: + +``` +cd tdn-desktop/src-tauri && cargo test --lib +``` + +Currently 263+ tests including 19 for date resolution/fuzzy matching and 8 for keyword status detection. + ## Known Limitations -- **Date arithmetic is unreliable.** The 3B model frequently gets relative date calculations wrong ("this Friday" off by days, "end of the month" wrong month). Planned fix: have the LLM extract raw date expressions and resolve them deterministically in Rust. -- **Project name matching is exact only.** "Japan Trip" won't match "Japan Trip 2025". Planned fix: fuzzy matching in Rust. -- **Few-shot contamination.** If an input is similar to a few-shot example, the model may copy fields from the example rather than generating from the actual input. -- **Body generation for complex inputs.** The model sometimes fabricates body content not present in the input. +- **LLM sometimes misses date expressions.** The model inconsistently extracts date references — "buy milk tomorrow" sometimes returns `scheduledRef: "tomorrow"`, sometimes returns empty. When it does extract, deterministic resolution handles it correctly. +- **LLM sometimes misses project names.** Even when a project name is explicitly in the input, the model may return empty. Fuzzy matching helps when the model returns a close-but-not-exact name, but can't help when it returns nothing. +- **Area hallucination.** When the model correctly identifies a project, it sometimes also fills in the parent area. This is harmless (both get set) but unexpected. +- **Body fabrication.** The model sometimes generates body content not present in the input. The `is_essentially_same` check catches parroting but not fabrication. - **`@Guide(Regex{...})` is incompatible with `.default` model.** Regex constraints cause `@Generable` to fail, falling back to plain text. Use `@Guide(description:)` only. - **`contentTagging` adapter is wrong for this task.** It produces topic tags instead of following structured extraction instructions. Use `.default`. From 274e2436ab65ab272d866f12f6f057e411ff7048 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 03:05:24 +0000 Subject: [PATCH 24/32] Tigheten Apple Intelligence prompts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Separate area and project lists to reduce area hallucination - Add "leave area empty when project is set" guidance - Add field definitions explaining what each field represents - Add 4th few-shot example for "this afternoon" → scheduledRef: "today" - Reorder @Generable: scheduledRef before dueRef (more common first) - Trim verbose instructions for smaller prompt footprint Eval improvement: ~15.5/31 → ~17.5/31 average across multiple runs. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src-tauri/src/commands/ai_prompts.rs | 114 ++++++++---------- .../src-tauri/swift/apple_intelligence.swift | 12 +- 2 files changed, 53 insertions(+), 73 deletions(-) diff --git a/tdn-desktop/src-tauri/src/commands/ai_prompts.rs b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs index 23ea60c0..ace4988b 100644 --- a/tdn-desktop/src-tauri/src/commands/ai_prompts.rs +++ b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs @@ -19,7 +19,7 @@ pub fn build_system_prompt( day_of_week: &str, ) -> String { let context_block = build_context_block(projects_with_areas, areas); - let examples_block = build_examples_block(today); + let examples_block = build_examples_block(); format!( "{ROLE}\n\ @@ -28,7 +28,7 @@ pub fn build_system_prompt( \n\ {context_block}\n\ \n\ - {FIELD_INSTRUCTIONS}\n\ + {FIELD_DEFINITIONS}\n\ \n\ {examples_block}" ) @@ -39,95 +39,75 @@ pub fn build_system_prompt( // ───────────────────────────────────────────────────────────────────────────── const ROLE: &str = "\ -You are a task field extractor. Given free-form text, populate structured task fields. \ +Extract structured task fields from free-form text. \ Return empty string for any field where the input provides no clear value. \ Empty string is always the safe choice."; -const FIELD_INSTRUCTIONS: &str = "\ -Field instructions: +const FIELD_DEFINITIONS: &str = "\ +Fields: -title: Rewrite the input as a concise, actionable task title. +title: A concise, actionable task title. Rewrite the input to be short and scannable. -body: Include only if the input has meaningful detail beyond what the title captures. \ -Otherwise empty string. +body: Extra detail from the input beyond the title. Empty string if the input is simple. -project: Set only if the input explicitly names a project from the list above. \ -Empty string if no project is mentioned by name. +project: A project name from the projects list above. \ +Set ONLY if the input explicitly names a project. Empty string otherwise. -area: Set only if the input explicitly names an area from the list above. \ -Empty string if no area is mentioned by name. +area: An area name from the areas list above. \ +Set ONLY if the input explicitly names an area AND no project was matched. \ +When a project is set, leave area as empty string — the app handles the relationship. -dueRef: If the input contains deadline language ('due by', 'deadline', 'must be done by', \ -'no later than', 'by [date]'), extract the date reference exactly as stated. \ -Examples: 'Friday', 'April 15th', 'end of March', 'end of next week'. \ -Empty string if no deadline is mentioned. +scheduledRef: A date or time reference for WHEN to do this task. \ +Extract the reference exactly as stated: 'today', 'tomorrow', 'this afternoon', \ +'Monday', 'this Friday', 'next week'. \ +Empty string if the input does not say when to do the task. -scheduledRef: If the input says when to do the task, extract the date reference exactly \ -as stated. Examples: 'today', 'tomorrow', 'Monday', 'this Friday', 'next week'. \ -Empty string if no timing is mentioned. Vague references ('for the week', 'soon') \ -are NOT scheduled dates — use empty string. +dueRef: A date or time reference for a DEADLINE. \ +Look for: 'by Friday', 'due April 15th', 'deadline is June 1st', 'end of March'. \ +Empty string if no deadline is mentioned. -deferUntilRef: If the input explicitly mentions deferring, extract the date reference. \ -Examples: 'after Monday', 'not until April'. This is rare. Empty string otherwise."; +deferUntilRef: A date reference for when this task BECOMES AVAILABLE. \ +Look for: 'not until Monday', 'defer until April', 'start after next week'. \ +This is rare. Empty string unless explicitly mentioned."; -/// Build few-shot examples. These are the highest-impact technique for small models. -/// Examples use raw date expressions (not YYYY-MM-DD) — Rust resolves them later. -fn build_examples_block(_today: &str) -> String { +/// Build few-shot examples. +fn build_examples_block() -> String { "\ Examples: Input: \"Buy groceries for the week\" -Output: {\"title\":\"Buy groceries\",\"body\":\"\",\"dueRef\":\"\",\"scheduledRef\":\"\",\"deferUntilRef\":\"\",\"project\":\"\",\"area\":\"\"} +Output: {\"title\":\"Buy groceries\",\"body\":\"\",\"scheduledRef\":\"\",\"dueRef\":\"\",\"deferUntilRef\":\"\",\"project\":\"\",\"area\":\"\"} Input: \"Call the dentist tomorrow about that crown\" -Output: {\"title\":\"Call dentist about crown\",\"body\":\"\",\"dueRef\":\"\",\"scheduledRef\":\"tomorrow\",\"deferUntilRef\":\"\",\"project\":\"\",\"area\":\"\"} +Output: {\"title\":\"Call dentist about crown\",\"body\":\"\",\"scheduledRef\":\"tomorrow\",\"dueRef\":\"\",\"deferUntilRef\":\"\",\"project\":\"\",\"area\":\"\"} Input: \"Finish the Newsletter Setup landing page by end of March\" -Output: {\"title\":\"Finish Newsletter Setup landing page\",\"body\":\"\",\"dueRef\":\"end of March\",\"scheduledRef\":\"\",\"deferUntilRef\":\"\",\"project\":\"Newsletter Setup\",\"area\":\"\"}" +Output: {\"title\":\"Finish Newsletter Setup landing page\",\"body\":\"\",\"scheduledRef\":\"\",\"dueRef\":\"end of March\",\"deferUntilRef\":\"\",\"project\":\"Newsletter Setup\",\"area\":\"\"} + +Input: \"Buy milk this afternoon\" +Output: {\"title\":\"Buy milk\",\"body\":\"\",\"scheduledRef\":\"today\",\"dueRef\":\"\",\"deferUntilRef\":\"\",\"project\":\"\",\"area\":\"\"}" .to_string() } -/// Build the structured context block showing areas and their projects. +/// Build the context block with separate area and project lists. fn build_context_block( projects_with_areas: &[ProjectWithArea], areas: &[NameIdPair], ) -> String { - // Group projects by area - let mut area_projects: std::collections::HashMap> = - std::collections::HashMap::new(); - let mut unassigned_projects: Vec = Vec::new(); - - for project in projects_with_areas { - if let Some(area_name) = &project.area_name { - area_projects - .entry(area_name.clone()) - .or_default() - .push(project.name.clone()); - } else { - unassigned_projects.push(project.name.clone()); - } - } - - let mut lines = vec!["Areas and projects:".to_string()]; - - for area in areas { - let projects = area_projects.get(&area.name); - match projects { - Some(p) if !p.is_empty() => { - lines.push(format!("- {}: {}", area.name, p.join(", "))); - } - _ => { - lines.push(format!("- {}", area.name)); - } - } - } - - if !unassigned_projects.is_empty() { - lines.push(format!( - "- (no area): {}", - unassigned_projects.join(", ") - )); - } - - lines.join("\n") + let area_names: Vec<&str> = areas.iter().map(|a| a.name.as_str()).collect(); + let project_names: Vec<&str> = projects_with_areas.iter().map(|p| p.name.as_str()).collect(); + + let areas_str = if area_names.is_empty() { + "(none)".to_string() + } else { + area_names.join(", ") + }; + + let projects_str = if project_names.is_empty() { + "(none)".to_string() + } else { + project_names.join(", ") + }; + + format!("Areas: {areas_str}\nProjects: {projects_str}") } diff --git a/tdn-desktop/src-tauri/swift/apple_intelligence.swift b/tdn-desktop/src-tauri/swift/apple_intelligence.swift index 5afb22ee..0a29b401 100644 --- a/tdn-desktop/src-tauri/swift/apple_intelligence.swift +++ b/tdn-desktop/src-tauri/swift/apple_intelligence.swift @@ -12,13 +12,13 @@ private struct ParsedTask: Sendable { @Guide(description: "Extra detail, or empty string") let body: String - @Guide(description: "Date reference for deadline, or empty string") - let dueRef: String - - @Guide(description: "Date reference for when to do this, or empty string") + @Guide(description: "When to do this task, e.g. 'today' or 'next Monday', or empty string") let scheduledRef: String - @Guide(description: "Date reference for deferral, or empty string") + @Guide(description: "Deadline date reference, e.g. 'by Friday' or 'April 15th', or empty string") + let dueRef: String + + @Guide(description: "When task becomes available, e.g. 'after Monday', or empty string") let deferUntilRef: String @Guide(description: "Project name or empty string") @@ -55,8 +55,8 @@ private func parsedTaskToJSON(_ task: ParsedTask) -> String { let fields: [(String, String)] = [ ("title", task.title), ("body", task.body), - ("dueRef", task.dueRef), ("scheduledRef", task.scheduledRef), + ("dueRef", task.dueRef), ("deferUntilRef", task.deferUntilRef), ("project", task.project), ("area", task.area), From 00f79e270d96a9cd5eb281cf079def9591ec5ab6 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 03:25:01 +0000 Subject: [PATCH 25/32] Reorder fields and add few-shot examples for project/due matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Reorder @Generable: project/area now before date fields (generated while input is still fresh in context) - Add project-matching example (Garden Renovation) - Add "by next Friday" due date example - Now 5 few-shot examples (Apple guidance: <5, this is at the limit) - Trim field definitions for smaller prompt footprint Eval improvement: ~15.5/31 (50%) → ~18.3/31 (59%) average across 3 runs. Project and due date extraction notably more reliable. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src-tauri/src/commands/ai_prompts.rs | 33 ++++++++++--------- .../src-tauri/swift/apple_intelligence.swift | 16 ++++----- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/tdn-desktop/src-tauri/src/commands/ai_prompts.rs b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs index ace4988b..6d818ddc 100644 --- a/tdn-desktop/src-tauri/src/commands/ai_prompts.rs +++ b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs @@ -55,37 +55,40 @@ Set ONLY if the input explicitly names a project. Empty string otherwise. area: An area name from the areas list above. \ Set ONLY if the input explicitly names an area AND no project was matched. \ -When a project is set, leave area as empty string — the app handles the relationship. +When a project is set, leave area as empty string. -scheduledRef: A date or time reference for WHEN to do this task. \ -Extract the reference exactly as stated: 'today', 'tomorrow', 'this afternoon', \ -'Monday', 'this Friday', 'next week'. \ -Empty string if the input does not say when to do the task. +scheduledRef: WHEN to do this task. Extract the date reference as stated: \ +'today', 'tomorrow', 'this afternoon', 'Monday', 'this Friday', 'next week'. \ +Empty string if no timing is mentioned. -dueRef: A date or time reference for a DEADLINE. \ -Look for: 'by Friday', 'due April 15th', 'deadline is June 1st', 'end of March'. \ +dueRef: A DEADLINE. Extract the date reference as stated: \ +'by Friday', 'April 15th', 'end of March', 'end of next week'. \ Empty string if no deadline is mentioned. -deferUntilRef: A date reference for when this task BECOMES AVAILABLE. \ -Look for: 'not until Monday', 'defer until April', 'start after next week'. \ +deferUntilRef: When this task BECOMES AVAILABLE. \ +'not until Monday', 'defer until April', 'start after next week'. \ This is rare. Empty string unless explicitly mentioned."; -/// Build few-shot examples. +/// Build few-shot examples. Apple guidance: <5 examples, written directly into the prompt. +/// Field order matches @Generable: title, body, project, area, scheduledRef, dueRef, deferUntilRef fn build_examples_block() -> String { "\ Examples: Input: \"Buy groceries for the week\" -Output: {\"title\":\"Buy groceries\",\"body\":\"\",\"scheduledRef\":\"\",\"dueRef\":\"\",\"deferUntilRef\":\"\",\"project\":\"\",\"area\":\"\"} +Output: {\"title\":\"Buy groceries\",\"body\":\"\",\"project\":\"\",\"area\":\"\",\"scheduledRef\":\"\",\"dueRef\":\"\",\"deferUntilRef\":\"\"} Input: \"Call the dentist tomorrow about that crown\" -Output: {\"title\":\"Call dentist about crown\",\"body\":\"\",\"scheduledRef\":\"tomorrow\",\"dueRef\":\"\",\"deferUntilRef\":\"\",\"project\":\"\",\"area\":\"\"} +Output: {\"title\":\"Call dentist about crown\",\"body\":\"\",\"project\":\"\",\"area\":\"\",\"scheduledRef\":\"tomorrow\",\"dueRef\":\"\",\"deferUntilRef\":\"\"} -Input: \"Finish the Newsletter Setup landing page by end of March\" -Output: {\"title\":\"Finish Newsletter Setup landing page\",\"body\":\"\",\"scheduledRef\":\"\",\"dueRef\":\"end of March\",\"deferUntilRef\":\"\",\"project\":\"Newsletter Setup\",\"area\":\"\"} +Input: \"Review the Garden Renovation plans with the contractor\" +Output: {\"title\":\"Review Garden Renovation plans with contractor\",\"body\":\"\",\"project\":\"Garden Renovation\",\"area\":\"\",\"scheduledRef\":\"\",\"dueRef\":\"\",\"deferUntilRef\":\"\"} + +Input: \"Submit the report by next Friday\" +Output: {\"title\":\"Submit report\",\"body\":\"\",\"project\":\"\",\"area\":\"\",\"scheduledRef\":\"\",\"dueRef\":\"next Friday\",\"deferUntilRef\":\"\"} Input: \"Buy milk this afternoon\" -Output: {\"title\":\"Buy milk\",\"body\":\"\",\"scheduledRef\":\"today\",\"dueRef\":\"\",\"deferUntilRef\":\"\",\"project\":\"\",\"area\":\"\"}" +Output: {\"title\":\"Buy milk\",\"body\":\"\",\"project\":\"\",\"area\":\"\",\"scheduledRef\":\"today\",\"dueRef\":\"\",\"deferUntilRef\":\"\"}" .to_string() } diff --git a/tdn-desktop/src-tauri/swift/apple_intelligence.swift b/tdn-desktop/src-tauri/swift/apple_intelligence.swift index 0a29b401..bab5d558 100644 --- a/tdn-desktop/src-tauri/swift/apple_intelligence.swift +++ b/tdn-desktop/src-tauri/swift/apple_intelligence.swift @@ -12,6 +12,12 @@ private struct ParsedTask: Sendable { @Guide(description: "Extra detail, or empty string") let body: String + @Guide(description: "Project name or empty string") + let project: String + + @Guide(description: "Area name or empty string") + let area: String + @Guide(description: "When to do this task, e.g. 'today' or 'next Monday', or empty string") let scheduledRef: String @@ -20,12 +26,6 @@ private struct ParsedTask: Sendable { @Guide(description: "When task becomes available, e.g. 'after Monday', or empty string") let deferUntilRef: String - - @Guide(description: "Project name or empty string") - let project: String - - @Guide(description: "Area name or empty string") - let area: String } // MARK: - Helpers @@ -55,11 +55,11 @@ private func parsedTaskToJSON(_ task: ParsedTask) -> String { let fields: [(String, String)] = [ ("title", task.title), ("body", task.body), + ("project", task.project), + ("area", task.area), ("scheduledRef", task.scheduledRef), ("dueRef", task.dueRef), ("deferUntilRef", task.deferUntilRef), - ("project", task.project), - ("area", task.area), ] let pairs = fields.map { (key, value) in From 20e9e3728f0bf3f2f6ba795da07783ad04905dab Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 03:44:58 +0000 Subject: [PATCH 26/32] Fix beach ball by making AI command async with spawn_blocking The process_quick_entry_text command was synchronous, which Tauri v2 runs on the main thread. The Swift FFI's DispatchSemaphore.wait() blocked the main thread for 2-3 seconds during inference, causing the beach ball cursor and preventing the spinner from rendering. Now the command is async, with the blocking FFI call wrapped in tauri::async_runtime::spawn_blocking. The main thread stays free so React can re-render (showing the spinner) and handle user input (Escape to dismiss). Extracts the synchronous logic into process_quick_entry_text_sync() which is also used by the eval harness. Co-Authored-By: Claude Opus 4.6 (1M context) --- tdn-desktop/src-tauri/src/commands/ai.rs | 21 +++++++++++++++++++-- tdn-desktop/src/lib/bindings.ts | 3 +++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs index 4067f517..349e8762 100644 --- a/tdn-desktop/src-tauri/src/commands/ai.rs +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -58,12 +58,29 @@ pub fn check_apple_intelligence_available() -> bool { /// /// Takes the raw text from the quick entry title field, plus lists of available /// projects (with area relationships) and areas for context. +/// +/// This command is async to avoid blocking the main thread — the Swift FFI call +/// uses a DispatchSemaphore which blocks for 2-3 seconds during inference. #[tauri::command] #[specta::specta] -pub fn process_quick_entry_text( +pub async fn process_quick_entry_text( text: String, projects: Vec, areas: Vec, +) -> Result { + // Move the blocking FFI work off the main thread + tauri::async_runtime::spawn_blocking(move || { + process_quick_entry_text_sync(&text, &projects, &areas) + }) + .await + .map_err(|e| format!("Task join error: {e}"))? +} + +/// Synchronous implementation — called from spawn_blocking and from the eval harness. +pub(crate) fn process_quick_entry_text_sync( + text: &str, + projects: &[ProjectContext], + areas: &[NameIdPair], ) -> Result { let trimmed = text.trim(); if trimmed.is_empty() { @@ -95,7 +112,7 @@ pub fn process_quick_entry_text( log::info!("Raw response: {response}"); - let mut result = parse_ai_response(&response, trimmed, &projects, &areas, today.date_naive())?; + let mut result = parse_ai_response(&response, trimmed, projects, areas, today.date_naive())?; // Determine status via keyword detection (not LLM) result.status = detect_status_from_keywords(trimmed).to_string(); diff --git a/tdn-desktop/src/lib/bindings.ts b/tdn-desktop/src/lib/bindings.ts index dbdf210c..b6db00b1 100644 --- a/tdn-desktop/src/lib/bindings.ts +++ b/tdn-desktop/src/lib/bindings.ts @@ -365,6 +365,9 @@ async checkAppleIntelligenceAvailable() : Promise { * * Takes the raw text from the quick entry title field, plus lists of available * projects (with area relationships) and areas for context. + * + * This command is async to avoid blocking the main thread — the Swift FFI call + * uses a DispatchSemaphore which blocks for 2-3 seconds during inference. */ async processQuickEntryText(text: string, projects: ProjectContext[], areas: NameIdPair[]) : Promise> { try { From 9d241bbbf82530afb8d8e3913217a2f31fefca56 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 03:57:59 +0000 Subject: [PATCH 27/32] Update all docs for AI processing, auto-ready, and polish Developer docs: - apple-intelligence.md: fix stale struct order, remove non-evergreen test counts, add "as of" framing for eval baseline, fix context block description - quick-panes.md: add AI shortcut, auto-ready, and Apple Intelligence sections User-facing docs: - quick-entry-pane.mdx: add auto-ready and AI processing sections - keyboard-shortcuts.mdx: add Cmd+Shift+A shortcut Task doc: mark Phase 11 complete. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../task-x-quick-entry-ai-processing.md | 13 ++++++------ .../docs/developer/apple-intelligence.md | 20 +++++++++++-------- tdn-desktop/docs/developer/quick-panes.md | 11 +++++++++- .../content/docs/desktop/quick-entry-pane.mdx | 14 +++++++++++++ .../desktop-reference/keyboard-shortcuts.mdx | 1 + 5 files changed, 43 insertions(+), 16 deletions(-) diff --git a/docs/tasks-todo/task-x-quick-entry-ai-processing.md b/docs/tasks-todo/task-x-quick-entry-ai-processing.md index 326bb221..3540d461 100644 --- a/docs/tasks-todo/task-x-quick-entry-ai-processing.md +++ b/docs/tasks-todo/task-x-quick-entry-ai-processing.md @@ -162,12 +162,11 @@ Key areas to improve: - Re-processing support (user processes, edits title, processes again) - Cancellation during processing (Escape while LLM is running) -- Very long input handling (context window limits?) -### Phase 11: Docs +### Phase 11: Docs ✅ -- Update developer quick-entry pane docs as needed -- Update userguide page on Quick Entry pane to mention: - A) Auto-setting of status to Ready when (project || area) && (scheduled || defer-until) are set - B) Basic explanation of how the sparkle button works and what it's for, and when it's available -- Update apple-intelligence.md developer doc so it's accurate about how things currently work, including the eval harness +Done. Updated: +- `tdn-desktop/docs/developer/quick-panes.md` — added AI shortcut, auto-ready, and Apple Intelligence integration sections +- `tdn-desktop/docs/developer/apple-intelligence.md` — updated eval baseline, added async/spawn_blocking note +- `website/src/content/docs/desktop/quick-entry-pane.mdx` — added auto-ready and AI processing sections +- `website/src/content/docs/reference/desktop-reference/keyboard-shortcuts.mdx` — added `Cmd+Shift+A` shortcut diff --git a/tdn-desktop/docs/developer/apple-intelligence.md b/tdn-desktop/docs/developer/apple-intelligence.md index 07ea7417..fa29a385 100644 --- a/tdn-desktop/docs/developer/apple-intelligence.md +++ b/tdn-desktop/docs/developer/apple-intelligence.md @@ -30,7 +30,7 @@ The Tauri command `process_quick_entry_text` receives the raw text and context. - Today's date and day of week - A structured list of areas and their projects (e.g. "Acme Corp: Acme Dashboard Redesign") - Per-field instructions explaining when to set each field and when to leave it empty -- 2-3 few-shot examples showing input text → expected JSON output, including an example where most fields are empty +- Few-shot examples showing input text → expected JSON output, including examples with empty fields The few-shot examples are the single highest-impact part of the prompt. They teach the model the expected output format and, critically, that leaving fields empty is the right thing to do when information isn't present. @@ -50,6 +50,8 @@ If `@Generable` succeeds (the normal path), the typed `ParsedTask` struct is man Because the Swift call is `async` but the C FFI is synchronous, a `DispatchSemaphore` bridges the two. A detached task runs the inference, signals the semaphore on completion, and the calling thread blocks until it's done. This takes ~2-3 seconds on Apple Silicon. +The Tauri command is `async` and wraps the blocking FFI call in `tauri::async_runtime::spawn_blocking` to keep the main thread free (avoiding the beach ball cursor and allowing React to render the loading spinner). + ### 6. Rust parses, resolves, and validates the response Back in Rust, `parse_ai_response()` processes the JSON string through several stages: @@ -159,7 +161,7 @@ The command builds the system prompt, calls the FFI, parses the response, resolv `src/commands/ai_prompts.rs` centralizes all prompt text. This is the primary file to edit when iterating on prompt quality. It contains: - `build_system_prompt()` — Assembles the complete prompt from role text, context, field instructions, and few-shot examples -- `build_context_block()` — Formats areas and their projects as a structured list +- `build_context_block()` — Formats areas and projects as separate lists for the prompt - `build_examples_block()` — Few-shot input→output pairs showing raw date expression extraction ### Date Resolution and Fuzzy Matching @@ -177,21 +179,23 @@ The command builds the system prompt, calls the FFI, parses the response, resolv struct ParsedTask: Sendable { let title: String // concise task title let body: String // extra detail, or empty string - let dueRef: String // raw deadline expression, or empty string - let scheduledRef: String // raw scheduling expression, or empty string - let deferUntilRef: String // raw deferral expression, or empty string let project: String // project name or empty string let area: String // area name or empty string + let scheduledRef: String // raw scheduling expression, or empty string + let dueRef: String // raw deadline expression, or empty string + let deferUntilRef: String // raw deferral expression, or empty string } ``` +Properties generate in declaration order. Project/area are placed before dates so the model considers them while the input is still fresh in context. + `@Generable` uses constrained decoding — the model's token generation is structurally constrained to produce valid output matching the struct. Note: **status is not in the struct** — it was removed because the model was inconsistent with it. Status is now determined by keyword detection in Rust and auto-ready rules in the frontend. Date fields are `*Ref` fields containing raw expressions ("tomorrow", "next Monday", "end of March") rather than YYYY-MM-DD dates. The model is good at text extraction but bad at date arithmetic, so date computation is done deterministically in Rust. -Each field has a `@Guide(description:)` annotation providing a short hint. The system prompt carries the detailed decision-making instructions. Properties generate in declaration order. +Each field has a `@Guide(description:)` annotation providing a short hint. The system prompt carries the detailed decision-making instructions. ## Frontend Integration @@ -252,7 +256,7 @@ Takes ~50 seconds (31 LLM calls). Prints a per-case pass/fail summary with raw v The harness does NOT assert on failure — it's a measurement tool, not a hard test. Some failures are expected while iterating on prompts. -Current baseline: **16/31 passing**. +As of March 2026, **~18/31 eval tests pass** — the remaining failures are mostly the model inconsistently extracting date expressions and project names from input. Run the eval across multiple runs to account for non-determinism. ### Unit tests @@ -262,7 +266,7 @@ Deterministic logic (date resolution, fuzzy matching, keyword status detection) cd tdn-desktop/src-tauri && cargo test --lib ``` -Currently 263+ tests including 19 for date resolution/fuzzy matching and 8 for keyword status detection. +These cover date resolution patterns, fuzzy project/area matching, and keyword-based status detection. ## Known Limitations diff --git a/tdn-desktop/docs/developer/quick-panes.md b/tdn-desktop/docs/developer/quick-panes.md index 5f8ef002..61793be3 100644 --- a/tdn-desktop/docs/developer/quick-panes.md +++ b/tdn-desktop/docs/developer/quick-panes.md @@ -84,7 +84,7 @@ src/components/quick-pane/ | ----------------- | -------------------------------------------------------------------- | | QuickPaneApp | Form state, submission logic, popover coordination, focus management | | QuickPaneCard | Visual container, CSS animations for show/hide | -| QuickPaneTitle | Title input with auto-resize | +| QuickPaneTitle | Title input with auto-resize, AI sparkle button | | QuickPaneBody | Collapsible notes with expand/collapse animation | | QuickPaneMetadata | Status and date selection (controlled popovers) | | QuickPaneFooter | Project/area selection, action buttons | @@ -109,9 +109,18 @@ src/components/quick-pane/ | `⌘ ⇧ D` | Open due date picker | | `⌃ ⇧ ⌘ D` | Open defer date picker | | `⌘ S` | Open status picker | +| `⌘ ⇧ A` | Process with AI (macOS only) | The `useQuickPaneKeyboard` hook handles all shortcuts using capture phase to intercept events before popovers receive them. +## Auto-Ready Status + +A `useEffect` in `QuickPaneApp` watches `[projectId, areaId, scheduled, deferUntil]`. When `(project OR area) AND (scheduled OR defer-until)` are set and status is `inbox`, it auto-promotes to `ready`. This applies to all quick entry (manual and AI-assisted). + +## Apple Intelligence Integration + +On macOS with Apple Intelligence, a sparkle button appears in the title row when the user has typed text. Pressing it (or `⌘⇧A`) sends the text through on-device AI processing to extract structured task fields. See `docs/developer/apple-intelligence.md` for the full architecture. + ## Platform Behavior | Platform | Panel Type | Fullscreen Overlay | Dismiss Behavior | diff --git a/website/src/content/docs/desktop/quick-entry-pane.mdx b/website/src/content/docs/desktop/quick-entry-pane.mdx index 5aded49b..01e456ed 100644 --- a/website/src/content/docs/desktop/quick-entry-pane.mdx +++ b/website/src/content/docs/desktop/quick-entry-pane.mdx @@ -36,3 +36,17 @@ While the majority of tasks created this way probably won't have a body, pressin
Quick entry pane with body expanded
+ +## Auto-ready status + +When you set a project or area **and** a scheduled or defer-until date, the status automatically changes from `inbox` to `ready`. A task with both a project/area and a date has been processed enough that it doesn't need to sit in the inbox. + +## AI processing (macOS) + +On Macs with Apple Intelligence enabled, a sparkle button appears next to the title when you've typed something. Clicking it (or pressing ) uses on-device AI to clean up your text into a concise task title and fill in the appropriate project, area, dates and other fields based on what you typed or dictated. + +This works entirely on-device — no data is sent to any server. The AI pre-fills the form fields for you to review and adjust before saving. + + diff --git a/website/src/content/docs/reference/desktop-reference/keyboard-shortcuts.mdx b/website/src/content/docs/reference/desktop-reference/keyboard-shortcuts.mdx index 20cd2b62..80d7440b 100644 --- a/website/src/content/docs/reference/desktop-reference/keyboard-shortcuts.mdx +++ b/website/src/content/docs/reference/desktop-reference/keyboard-shortcuts.mdx @@ -106,6 +106,7 @@ These shortcuts work when the [Quick Entry Pane](/desktop/quick-entry-pane/) is | | Open due date picker | | | Open defer date picker | | | Open status picker | +| | Process with AI (macOS only, requires Apple Intelligence) | | | Close pane (or close open picker) | ## Keyboard Navigation From 4a4fef42112e5a1b8aac6383d8f5fd5433043ad0 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 04:05:41 +0000 Subject: [PATCH 28/32] Code cleanup: fix formatting, clippy warnings, remove debug test - cargo fmt across all new Rust files - Fix 4 clippy warnings: needless borrow, char comparison pattern, trim before split_whitespace, dead_code allow annotation - Remove date_explore_fuzzydate debug exploration test - Prettier formatting for QuickPaneApp.tsx and apple-intelligence.md Co-Authored-By: Claude Opus 4.6 (1M context) --- .../docs/developer/apple-intelligence.md | 5 +- tdn-desktop/src-tauri/src/commands/ai.rs | 273 +++++++++++++----- .../src-tauri/src/commands/ai_prompts.rs | 13 +- .../src-tauri/src/commands/ai_resolve.rs | 65 ++--- .../components/quick-pane/QuickPaneApp.tsx | 3 +- 5 files changed, 237 insertions(+), 122 deletions(-) diff --git a/tdn-desktop/docs/developer/apple-intelligence.md b/tdn-desktop/docs/developer/apple-intelligence.md index fa29a385..4dc529dc 100644 --- a/tdn-desktop/docs/developer/apple-intelligence.md +++ b/tdn-desktop/docs/developer/apple-intelligence.md @@ -56,7 +56,7 @@ The Tauri command is `async` and wraps the blocking FFI call in `tauri::async_ru Back in Rust, `parse_ai_response()` processes the JSON string through several stages: -**Code fence stripping:** If the fallback path produced a markdown-wrapped JSON block (`` ```json...``` ``), the fences are stripped so `serde_json` can parse it. +**Code fence stripping:** If the fallback path produced a markdown-wrapped JSON block (` ```json...``` `), the fences are stripped so `serde_json` can parse it. **Title extraction:** The model's title is used. If JSON parsing failed entirely, the original input text becomes the title. @@ -67,6 +67,7 @@ Back in Rust, `parse_ai_response()` processes the JSON string through several st **Project/area matching (`ai_resolve.rs`):** The model returns a name string. Rust first tries case-insensitive exact match, then falls back to case-insensitive substring match (minimum 3 characters). This handles the common case where the model returns a truncated name ("Japan Trip" matches "Japan Trip 2025"). No match → field is left empty. **Status determination:** Status is NOT set by the LLM. Instead: + 1. `detect_status_from_keywords()` scans the original input text for explicit status phrases: `blocked` / `waiting on` → blocked, `icebox` / `ice box` → icebox, `in progress` / `in-progress` → in-progress. Everything else → inbox. 2. The frontend applies auto-ready rules after (see step 7). @@ -145,12 +146,14 @@ The Swift code bridges async/await to synchronous C using `DispatchSemaphore` + ### Rust FFI Wrapper `src/apple_intelligence.rs` provides safe Rust functions over the unsafe C FFI: + - `check_availability()` → `bool` - `process_text(system_prompt, user_content, max_tokens)` → `Result` ### Tauri Commands `src/commands/ai.rs` exposes two commands to the frontend: + - `check_apple_intelligence_available()` → `bool` - `process_quick_entry_text(text, projects, areas)` → `Result` diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs index 349e8762..0dc20c8b 100644 --- a/tdn-desktop/src-tauri/src/commands/ai.rs +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -101,8 +101,12 @@ pub(crate) fn process_quick_entry_text_sync( }) .collect(); - let system_prompt = - super::ai_prompts::build_system_prompt(&projects_with_areas, &areas, &date_str, &day_of_week); + let system_prompt = super::ai_prompts::build_system_prompt( + &projects_with_areas, + areas, + &date_str, + &day_of_week, + ); log::info!("── AI Quick Entry ──────────────────────────────────"); log::info!("Input: {trimmed:?}"); @@ -112,7 +116,8 @@ pub(crate) fn process_quick_entry_text_sync( log::info!("Raw response: {response}"); - let mut result = parse_ai_response(&response, trimmed, projects, areas, today.date_naive())?; + let mut result = + parse_ai_response(&response, trimmed, projects, areas, today.date_naive())?; // Determine status via keyword detection (not LLM) result.status = detect_status_from_keywords(trimmed).to_string(); @@ -153,10 +158,7 @@ fn strip_code_fences(s: &str) -> &str { // Skip the language tag (e.g. "json") on the first line let after_tag = rest.find('\n').map(|i| &rest[i + 1..]).unwrap_or(rest); // Strip trailing fence - after_tag - .strip_suffix("```") - .unwrap_or(after_tag) - .trim() + after_tag.strip_suffix("```").unwrap_or(after_tag).trim() } else { trimmed } @@ -196,9 +198,7 @@ fn parse_ai_response( } else { // Title was transformed — preserve original text in body // Don't append AI body if it's just parroting the input - if body_from_ai.is_empty() - || is_essentially_same(&body_from_ai, original_text.trim()) - { + if body_from_ai.is_empty() || is_essentially_same(&body_from_ai, original_text.trim()) { original_text.trim().to_string() } else { format!("{}\n\n{}", original_text.trim(), body_from_ai) @@ -258,11 +258,7 @@ fn is_essentially_same(a: &str, b: &str) -> bool { if a.is_empty() || b.is_empty() { return a.is_empty() && b.is_empty(); } - let normalize = |s: &str| { - s.trim() - .trim_end_matches(|c: char| c == '.' || c == '!' || c == '?') - .to_lowercase() - }; + let normalize = |s: &str| s.trim().trim_end_matches(['.', '!', '?']).to_lowercase(); normalize(a) == normalize(b) } @@ -280,18 +276,12 @@ pub fn detect_status_from_keywords(input: &str) -> &'static str { let lower = input.to_lowercase(); // Check for blocked — explicit blocking language - if lower.contains("blocked") - || lower.contains("waiting on") - || lower.contains("waitingon") - { + if lower.contains("blocked") || lower.contains("waiting on") || lower.contains("waitingon") { return "blocked"; } // Check for icebox — only very explicit mentions - if lower.contains("icebox") - || lower.contains("ice box") - || lower.contains("ice-box") - { + if lower.contains("icebox") || lower.contains("ice box") || lower.contains("ice-box") { return "icebox"; } @@ -317,56 +307,107 @@ mod tests { #[test] fn keyword_default_is_inbox() { assert_eq!(detect_status_from_keywords("Buy groceries"), "inbox"); - assert_eq!(detect_status_from_keywords("Call the dentist tomorrow"), "inbox"); + assert_eq!( + detect_status_from_keywords("Call the dentist tomorrow"), + "inbox" + ); assert_eq!(detect_status_from_keywords("Review the mockups"), "inbox"); } #[test] fn keyword_detects_blocked() { - assert_eq!(detect_status_from_keywords("This is blocked by the security review"), "blocked"); - assert_eq!(detect_status_from_keywords("Waiting on the client to respond"), "blocked"); - assert_eq!(detect_status_from_keywords("waitingon client response"), "blocked"); + assert_eq!( + detect_status_from_keywords("This is blocked by the security review"), + "blocked" + ); + assert_eq!( + detect_status_from_keywords("Waiting on the client to respond"), + "blocked" + ); + assert_eq!( + detect_status_from_keywords("waitingon client response"), + "blocked" + ); } #[test] fn keyword_blocked_is_narrow() { - assert_eq!(detect_status_from_keywords("Can't proceed until we get approval"), "inbox"); - assert_eq!(detect_status_from_keywords("Stuck on the API migration"), "inbox"); + assert_eq!( + detect_status_from_keywords("Can't proceed until we get approval"), + "inbox" + ); + assert_eq!( + detect_status_from_keywords("Stuck on the API migration"), + "inbox" + ); } #[test] fn keyword_detects_icebox() { - assert_eq!(detect_status_from_keywords("Icebox task to learn piano"), "icebox"); - assert_eq!(detect_status_from_keywords("Put this in the ice box"), "icebox"); - assert_eq!(detect_status_from_keywords("ice-box this for later"), "icebox"); + assert_eq!( + detect_status_from_keywords("Icebox task to learn piano"), + "icebox" + ); + assert_eq!( + detect_status_from_keywords("Put this in the ice box"), + "icebox" + ); + assert_eq!( + detect_status_from_keywords("ice-box this for later"), + "icebox" + ); } #[test] fn keyword_icebox_is_narrow() { assert_eq!(detect_status_from_keywords("Maybe call the bank"), "inbox"); - assert_eq!(detect_status_from_keywords("I might need to do this"), "inbox"); + assert_eq!( + detect_status_from_keywords("I might need to do this"), + "inbox" + ); assert_eq!(detect_status_from_keywords("One day learn guitar"), "inbox"); - assert_eq!(detect_status_from_keywords("Eventually get around to it"), "inbox"); + assert_eq!( + detect_status_from_keywords("Eventually get around to it"), + "inbox" + ); } #[test] fn keyword_detects_in_progress() { - assert_eq!(detect_status_from_keywords("This is in progress"), "in-progress"); - assert_eq!(detect_status_from_keywords("Mark as in-progress"), "in-progress"); - assert_eq!(detect_status_from_keywords("inprogress task"), "in-progress"); + assert_eq!( + detect_status_from_keywords("This is in progress"), + "in-progress" + ); + assert_eq!( + detect_status_from_keywords("Mark as in-progress"), + "in-progress" + ); + assert_eq!( + detect_status_from_keywords("inprogress task"), + "in-progress" + ); } #[test] fn keyword_in_progress_is_narrow() { - assert_eq!(detect_status_from_keywords("Already started the refactor"), "inbox"); - assert_eq!(detect_status_from_keywords("Working on the dashboard"), "inbox"); + assert_eq!( + detect_status_from_keywords("Already started the refactor"), + "inbox" + ); + assert_eq!( + detect_status_from_keywords("Working on the dashboard"), + "inbox" + ); } #[test] fn keyword_case_insensitive() { assert_eq!(detect_status_from_keywords("This is BLOCKED"), "blocked"); assert_eq!(detect_status_from_keywords("ICEBOX this task"), "icebox"); - assert_eq!(detect_status_from_keywords("IN PROGRESS refactor"), "in-progress"); + assert_eq!( + detect_status_from_keywords("IN PROGRESS refactor"), + "in-progress" + ); } } @@ -394,30 +435,97 @@ mod eval { fn eval_projects() -> Vec { vec![ - ProjectContext { id: "p-japan".into(), name: "Japan Trip 2025".into(), area_name: Some("Travel".into()) }, - ProjectContext { id: "p-acme".into(), name: "Acme Dashboard Redesign".into(), area_name: Some("Acme Corp".into()) }, - ProjectContext { id: "p-tax".into(), name: "Q1 Tax Preparation".into(), area_name: Some("Finance".into()) }, - ProjectContext { id: "p-blog".into(), name: "Tech Blog Relaunch".into(), area_name: Some("Writing".into()) }, - ProjectContext { id: "p-cli".into(), name: "Open Source CLI Tool".into(), area_name: Some("Coding".into()) }, - ProjectContext { id: "p-marathon".into(), name: "Half Marathon Training".into(), area_name: Some("Health".into()) }, - ProjectContext { id: "p-office".into(), name: "Home Office Setup".into(), area_name: Some("Home".into()) }, - ProjectContext { id: "p-garden".into(), name: "Garden Renovation".into(), area_name: Some("Home".into()) }, - ProjectContext { id: "p-newsletter".into(), name: "Newsletter Setup".into(), area_name: Some("Writing".into()) }, - ProjectContext { id: "p-rust".into(), name: "Learn Rust".into(), area_name: Some("Learning".into()) }, + ProjectContext { + id: "p-japan".into(), + name: "Japan Trip 2025".into(), + area_name: Some("Travel".into()), + }, + ProjectContext { + id: "p-acme".into(), + name: "Acme Dashboard Redesign".into(), + area_name: Some("Acme Corp".into()), + }, + ProjectContext { + id: "p-tax".into(), + name: "Q1 Tax Preparation".into(), + area_name: Some("Finance".into()), + }, + ProjectContext { + id: "p-blog".into(), + name: "Tech Blog Relaunch".into(), + area_name: Some("Writing".into()), + }, + ProjectContext { + id: "p-cli".into(), + name: "Open Source CLI Tool".into(), + area_name: Some("Coding".into()), + }, + ProjectContext { + id: "p-marathon".into(), + name: "Half Marathon Training".into(), + area_name: Some("Health".into()), + }, + ProjectContext { + id: "p-office".into(), + name: "Home Office Setup".into(), + area_name: Some("Home".into()), + }, + ProjectContext { + id: "p-garden".into(), + name: "Garden Renovation".into(), + area_name: Some("Home".into()), + }, + ProjectContext { + id: "p-newsletter".into(), + name: "Newsletter Setup".into(), + area_name: Some("Writing".into()), + }, + ProjectContext { + id: "p-rust".into(), + name: "Learn Rust".into(), + area_name: Some("Learning".into()), + }, ] } fn eval_areas() -> Vec { vec![ - NameIdPair { id: "a-travel".into(), name: "Travel".into() }, - NameIdPair { id: "a-acme".into(), name: "Acme Corp".into() }, - NameIdPair { id: "a-finance".into(), name: "Finance".into() }, - NameIdPair { id: "a-writing".into(), name: "Writing".into() }, - NameIdPair { id: "a-coding".into(), name: "Coding".into() }, - NameIdPair { id: "a-health".into(), name: "Health".into() }, - NameIdPair { id: "a-home".into(), name: "Home".into() }, - NameIdPair { id: "a-learning".into(), name: "Learning".into() }, - NameIdPair { id: "a-marketing".into(), name: "Marketing".into() }, + NameIdPair { + id: "a-travel".into(), + name: "Travel".into(), + }, + NameIdPair { + id: "a-acme".into(), + name: "Acme Corp".into(), + }, + NameIdPair { + id: "a-finance".into(), + name: "Finance".into(), + }, + NameIdPair { + id: "a-writing".into(), + name: "Writing".into(), + }, + NameIdPair { + id: "a-coding".into(), + name: "Coding".into(), + }, + NameIdPair { + id: "a-health".into(), + name: "Health".into(), + }, + NameIdPair { + id: "a-home".into(), + name: "Home".into(), + }, + NameIdPair { + id: "a-learning".into(), + name: "Learning".into(), + }, + NameIdPair { + id: "a-marketing".into(), + name: "Marketing".into(), + }, ] } @@ -478,7 +586,11 @@ mod eval { let mut failures = Vec::new(); // Check title - if !result.title.to_lowercase().contains(&expected.title_contains.to_lowercase()) { + if !result + .title + .to_lowercase() + .contains(&expected.title_contains.to_lowercase()) + { failures.push(format!( "title: expected to contain {:?}, got {:?}", expected.title_contains, result.title @@ -525,16 +637,18 @@ mod eval { } None => { if result.area_id.is_some() { - failures.push(format!( - "area: expected None, got {:?}", - result.area_id - )); + failures.push(format!("area: expected None, got {:?}", result.area_id)); } } } // Check dates - check_date_field("scheduled", &result.scheduled, expected.scheduled, &mut failures); + check_date_field( + "scheduled", + &result.scheduled, + expected.scheduled, + &mut failures, + ); check_date_field("due", &result.due, expected.due, &mut failures); check_date_field("defer", &result.defer_until, expected.defer, &mut failures); @@ -561,16 +675,12 @@ mod eval { match expected { Some(date) => { if actual.as_deref() != Some(date) { - failures.push(format!( - "{name}: expected Some({date:?}), got {actual:?}" - )); + failures.push(format!("{name}: expected Some({date:?}), got {actual:?}")); } } None => { if actual.is_some() { - failures.push(format!( - "{name}: expected None, got {actual:?}" - )); + failures.push(format!("{name}: expected None, got {actual:?}")); } } } @@ -588,7 +698,8 @@ mod eval { // deterministic Rust code (if title differs from input, original text goes in // body), so it's not testing LLM quality. Use Some(true) only when the title // is very likely to be identical to input (i.e. input is already a clean title). - let cases: Vec<(&str, Expected)> = vec![ + let cases: Vec<(&str, Expected)> = + vec![ // ============================================================= // SIMPLE INPUTS — no metadata expected @@ -944,10 +1055,14 @@ mod eval { } else { total_fail += 1; println!(" ✗ {input:?}"); - println!(" Raw: title={:?} status={:?} project={:?} area={:?}", - result.title, result.status, result.project_id, result.area_id); - println!(" due={:?} sched={:?} defer={:?}", - result.due, result.scheduled, result.defer_until); + println!( + " Raw: title={:?} status={:?} project={:?} area={:?}", + result.title, result.status, result.project_id, result.area_id + ); + println!( + " due={:?} sched={:?} defer={:?}", + result.due, result.scheduled, result.defer_until + ); for f in &failures { println!(" FAIL: {f}"); } @@ -955,8 +1070,10 @@ mod eval { } println!("\n----------------------------------------------------------------------"); - println!("Results: {total_pass} passed, {total_fail} failed out of {} cases", - cases.len()); + println!( + "Results: {total_pass} passed, {total_fail} failed out of {} cases", + cases.len() + ); println!("----------------------------------------------------------------------\n"); // Don't assert — this is an eval tool, not a hard test. diff --git a/tdn-desktop/src-tauri/src/commands/ai_prompts.rs b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs index 6d818ddc..bd79b7e4 100644 --- a/tdn-desktop/src-tauri/src/commands/ai_prompts.rs +++ b/tdn-desktop/src-tauri/src/commands/ai_prompts.rs @@ -5,7 +5,8 @@ use super::ai::NameIdPair; -/// A project with its area relationship, for richer context in the prompt. +/// A project for the prompt context. +#[allow(dead_code)] // area_name reserved for potential grouped context display pub struct ProjectWithArea { pub name: String, pub area_name: Option, @@ -93,12 +94,12 @@ Output: {\"title\":\"Buy milk\",\"body\":\"\",\"project\":\"\",\"area\":\"\",\"s } /// Build the context block with separate area and project lists. -fn build_context_block( - projects_with_areas: &[ProjectWithArea], - areas: &[NameIdPair], -) -> String { +fn build_context_block(projects_with_areas: &[ProjectWithArea], areas: &[NameIdPair]) -> String { let area_names: Vec<&str> = areas.iter().map(|a| a.name.as_str()).collect(); - let project_names: Vec<&str> = projects_with_areas.iter().map(|p| p.name.as_str()).collect(); + let project_names: Vec<&str> = projects_with_areas + .iter() + .map(|p| p.name.as_str()) + .collect(); let areas_str = if area_names.is_empty() { "(none)".to_string() diff --git a/tdn-desktop/src-tauri/src/commands/ai_resolve.rs b/tdn-desktop/src-tauri/src/commands/ai_resolve.rs index b2e1ea6d..d57d50c4 100644 --- a/tdn-desktop/src-tauri/src/commands/ai_resolve.rs +++ b/tdn-desktop/src-tauri/src/commands/ai_resolve.rs @@ -91,9 +91,18 @@ fn resolve_end_of_month(expr: &str, today: NaiveDate) -> Option { // "end of March", "end of April", etc. let months = [ - ("january", 1), ("february", 2), ("march", 3), ("april", 4), - ("may", 5), ("june", 6), ("july", 7), ("august", 8), - ("september", 9), ("october", 10), ("november", 11), ("december", 12), + ("january", 1), + ("february", 2), + ("march", 3), + ("april", 4), + ("may", 5), + ("june", 6), + ("july", 7), + ("august", 8), + ("september", 9), + ("october", 10), + ("november", 11), + ("december", 12), ]; if let Some(rest) = lower.strip_prefix("end of ") { @@ -134,7 +143,7 @@ fn resolve_in_n_weeks(expr: &str, today: NaiveDate) -> Option { // "in N weeks" / "in N week" if let Some(rest) = lower.strip_prefix("in ") { - let parts: Vec<&str> = rest.trim().split_whitespace().collect(); + let parts: Vec<&str> = rest.split_whitespace().collect(); if parts.len() == 2 { if let Some(n) = word_to_num(parts[0]) { if parts[1].starts_with("week") { @@ -157,11 +166,9 @@ fn resolve_in_n_weeks(expr: &str, today: NaiveDate) -> Option { /// Get the last day of a given month. fn last_day_of_month(year: i32, month: u32) -> Option { if month == 12 { - NaiveDate::from_ymd_opt(year + 1, 1, 1) - .and_then(|d| d.pred_opt()) + NaiveDate::from_ymd_opt(year + 1, 1, 1).and_then(|d| d.pred_opt()) } else { - NaiveDate::from_ymd_opt(year, month + 1, 1) - .and_then(|d| d.pred_opt()) + NaiveDate::from_ymd_opt(year, month + 1, 1).and_then(|d| d.pred_opt()) } } @@ -327,23 +334,6 @@ mod tests { assert_eq!(resolve_date_expression("banana", test_date()), None); } - #[test] - #[ignore] - fn date_explore_fuzzydate() { - let today = test_date(); - let cases = vec![ - "April 15", "April 15th", "15 April", "15th April", - "March 31", "March 31st", "end of March", "end of the month", - "Friday", "this Friday", "next Friday", - "in 3 weeks", "in two weeks", "in 2 weeks", - "Thursday", "on Thursday", - ]; - for c in cases { - let result = resolve_date_expression(c, today); - println!(" {:30} → {:?}", c, result); - } - } - // ── Project fuzzy matching tests ───────────────────────────────────── fn test_projects() -> Vec { @@ -426,28 +416,31 @@ mod tests { fn test_areas() -> Vec { vec![ - NameIdPair { id: "a-acme".into(), name: "Acme Corp".into() }, - NameIdPair { id: "a-finance".into(), name: "Finance".into() }, - NameIdPair { id: "a-home".into(), name: "Home".into() }, + NameIdPair { + id: "a-acme".into(), + name: "Acme Corp".into(), + }, + NameIdPair { + id: "a-finance".into(), + name: "Finance".into(), + }, + NameIdPair { + id: "a-home".into(), + name: "Home".into(), + }, ] } #[test] fn area_exact_match() { let areas = test_areas(); - assert_eq!( - match_area_fuzzy("Acme Corp", &areas), - Some("a-acme".into()) - ); + assert_eq!(match_area_fuzzy("Acme Corp", &areas), Some("a-acme".into())); } #[test] fn area_substring_match() { let areas = test_areas(); - assert_eq!( - match_area_fuzzy("Acme", &areas), - Some("a-acme".into()) - ); + assert_eq!(match_area_fuzzy("Acme", &areas), Some("a-acme".into())); } #[test] diff --git a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx index 935f2889..3a68404b 100644 --- a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx +++ b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx @@ -321,7 +321,8 @@ export default function QuickPaneApp() { const todayDate = new Date(todayStr + 'T00:00:00') const scheduledDate = new Date(parsed.scheduled + 'T00:00:00') const daysUntil = Math.round( - (scheduledDate.getTime() - todayDate.getTime()) / (1000 * 60 * 60 * 24) + (scheduledDate.getTime() - todayDate.getTime()) / + (1000 * 60 * 60 * 24) ) if (daysUntil >= 0 && daysUntil <= 7) { setStatus('ready') From 5eb430c43568fbdcd916fe8c06913485e0cc18ca Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 04:06:24 +0000 Subject: [PATCH 29/32] Update task doc --- docs/tasks-todo/task-x-quick-entry-ai-processing.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tasks-todo/task-x-quick-entry-ai-processing.md b/docs/tasks-todo/task-x-quick-entry-ai-processing.md index 3540d461..fa5eeb66 100644 --- a/docs/tasks-todo/task-x-quick-entry-ai-processing.md +++ b/docs/tasks-todo/task-x-quick-entry-ai-processing.md @@ -148,7 +148,7 @@ Done. The LLM now extracts raw date expressions ("tomorrow", "next Monday", "end - LLM fills in parent area when only project should be set (hallucination) - These are prompt refinement problems, not resolution problems -### Phase 9: Prompt Refinement +### Phase 9: Prompt Refinement ✅ Iterate on the system prompt and few-shot examples to improve the LLM's extraction reliability. The eval harness (`cargo test eval_ai --lib -- --ignored --nocapture` from `src-tauri/`) makes this a fast feedback loop — edit `ai_prompts.rs`, rebuild, run eval, compare results. @@ -158,7 +158,7 @@ Key areas to improve: - LLM hallucinating area when only project is referenced (fills in parent area) - Consider whether additional few-shot examples showing date ref extraction would help -### Phase 10: Polish and Edge Cases +### Phase 10: Polish and Edge Cases ✅ - Re-processing support (user processes, edits title, processes again) - Cancellation during processing (Escape while LLM is running) From 001718b9f3b6ef0e7b85ce207f8669d11f2495b2 Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 15:22:48 +0000 Subject: [PATCH 30/32] Address code review findings from CodeRabbit build.rs: - Check xcrun exit status before consuming stdout; surface stderr on failure instead of producing empty paths ai.rs: - Word-boundary matching for "blocked" keyword to prevent false positives on "unblocked" - Empty/whitespace title fallback to original input - Remove cfg(macos/aarch64) gates from parsing helpers so they compile and are testable on all platforms - Add unit tests for strip_code_fences, is_essentially_same, parse_ai_response fallbacks (7 new tests) ai_resolve.rs: - Proper calendar month arithmetic instead of 30-day approximation (handles month-end clamping, e.g. Jan 31 + 1 month = Feb 28) - Preprocess input before custom resolvers so "by end of March" works - Add regression tests for month arithmetic and preprocessing (3 new) ai_prompts.rs / ai_resolve.rs: - Reduce module visibility from pub to pub(crate) manager.rs: - Single read lock for project+area resolution in update_task (was taking two separate locks) QuickPaneApp.tsx: - Isolate AI availability check so failures don't block areas/projects loading or title focus Co-Authored-By: Claude Opus 4.6 (1M context) --- tdn-desktop/src-tauri/build.rs | 44 ++++---- tdn-desktop/src-tauri/src/commands/ai.rs | 100 ++++++++++++++++-- .../src-tauri/src/commands/ai_resolve.rs | 53 ++++++++-- tdn-desktop/src-tauri/src/commands/mod.rs | 4 +- tdn-desktop/src-tauri/src/vault/manager.rs | 25 ++--- .../components/quick-pane/QuickPaneApp.tsx | 12 ++- 6 files changed, 182 insertions(+), 56 deletions(-) diff --git a/tdn-desktop/src-tauri/build.rs b/tdn-desktop/src-tauri/build.rs index 6ad7bfbd..29908656 100644 --- a/tdn-desktop/src-tauri/build.rs +++ b/tdn-desktop/src-tauri/build.rs @@ -30,16 +30,18 @@ fn build_apple_intelligence_bridge() { let object_path = out_dir.join("apple_intelligence.o"); let static_lib_path = out_dir.join("libapple_intelligence.a"); - let sdk_path = String::from_utf8( - Command::new("xcrun") - .args(["--sdk", "macosx", "--show-sdk-path"]) - .output() - .expect("Failed to locate macOS SDK") - .stdout, - ) - .expect("SDK path is not valid UTF-8") - .trim() - .to_string(); + let sdk_output = Command::new("xcrun") + .args(["--sdk", "macosx", "--show-sdk-path"]) + .output() + .expect("Failed to run xcrun"); + if !sdk_output.status.success() { + let stderr = String::from_utf8_lossy(&sdk_output.stderr); + panic!("xcrun --show-sdk-path failed: {stderr}"); + } + let sdk_path = String::from_utf8(sdk_output.stdout) + .expect("SDK path is not valid UTF-8") + .trim() + .to_string(); // Check if the SDK supports FoundationModels (required for Apple Intelligence) let framework_path = @@ -58,16 +60,18 @@ fn build_apple_intelligence_bridge() { panic!("Source file {source_file} is missing!"); } - let swiftc_path = String::from_utf8( - Command::new("xcrun") - .args(["--find", "swiftc"]) - .output() - .expect("Failed to locate swiftc") - .stdout, - ) - .expect("swiftc path is not valid UTF-8") - .trim() - .to_string(); + let swiftc_output = Command::new("xcrun") + .args(["--find", "swiftc"]) + .output() + .expect("Failed to run xcrun --find swiftc"); + if !swiftc_output.status.success() { + let stderr = String::from_utf8_lossy(&swiftc_output.stderr); + panic!("xcrun --find swiftc failed: {stderr}"); + } + let swiftc_path = String::from_utf8(swiftc_output.stdout) + .expect("swiftc path is not valid UTF-8") + .trim() + .to_string(); let toolchain_swift_lib = Path::new(&swiftc_path) .parent() diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs index 0dc20c8b..9dd8d6c6 100644 --- a/tdn-desktop/src-tauri/src/commands/ai.rs +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -151,7 +151,6 @@ pub(crate) fn process_quick_entry_text_sync( } /// Strip markdown code fences from a response (e.g. ```json\n{...}\n```) -#[cfg(all(target_os = "macos", target_arch = "aarch64"))] fn strip_code_fences(s: &str) -> &str { let trimmed = s.trim(); if let Some(rest) = trimmed.strip_prefix("```") { @@ -166,7 +165,6 @@ fn strip_code_fences(s: &str) -> &str { /// Parse the AI response JSON into a `ParsedQuickEntry`, resolving project/area names to IDs. /// `today` is used for resolving relative date expressions. -#[cfg(all(target_os = "macos", target_arch = "aarch64"))] fn parse_ai_response( response: &str, original_text: &str, @@ -178,11 +176,12 @@ fn parse_ai_response( // Also handles fallback where model returns JSON wrapped in markdown code fences. let clean_response = strip_code_fences(response); if let Ok(parsed) = serde_json::from_str::(clean_response) { - let title = parsed["title"] - .as_str() - .unwrap_or(original_text) - .trim() - .to_string(); + let raw_title = parsed["title"].as_str().unwrap_or("").trim(); + let title = if raw_title.is_empty() { + original_text.trim().to_string() + } else { + raw_title.to_string() + }; let body_from_ai = parsed["body"].as_str().unwrap_or("").trim().to_string(); @@ -253,7 +252,6 @@ fn parse_ai_response( /// Check if two strings are essentially the same (ignoring case, trailing punctuation, whitespace). /// Used to avoid duplicating content when the AI parrots back the input. -#[cfg(all(target_os = "macos", target_arch = "aarch64"))] fn is_essentially_same(a: &str, b: &str) -> bool { if a.is_empty() || b.is_empty() { return a.is_empty() && b.is_empty(); @@ -275,13 +273,34 @@ fn is_essentially_same(a: &str, b: &str) -> bool { pub fn detect_status_from_keywords(input: &str) -> &'static str { let lower = input.to_lowercase(); + // Use word-boundary matching to avoid false positives like "unblocked" + let has_word = |word: &str| { + lower + .find(word) + .map(|pos| { + let before = if pos == 0 { + true + } else { + !lower.as_bytes()[pos - 1].is_ascii_alphanumeric() + }; + let after_pos = pos + word.len(); + let after = if after_pos >= lower.len() { + true + } else { + !lower.as_bytes()[after_pos].is_ascii_alphanumeric() + }; + before && after + }) + .unwrap_or(false) + }; + // Check for blocked — explicit blocking language - if lower.contains("blocked") || lower.contains("waiting on") || lower.contains("waitingon") { + if has_word("blocked") || lower.contains("waiting on") || lower.contains("waitingon") { return "blocked"; } // Check for icebox — only very explicit mentions - if lower.contains("icebox") || lower.contains("ice box") || lower.contains("ice-box") { + if has_word("icebox") || lower.contains("ice box") || lower.contains("ice-box") { return "icebox"; } @@ -409,6 +428,67 @@ mod tests { "in-progress" ); } + + #[test] + fn keyword_no_false_positive_on_unblocked() { + assert_eq!( + detect_status_from_keywords("This task is now unblocked"), + "inbox" + ); + } + + // ── Parsing helper tests ───────────────────────────────────────────── + + #[test] + fn strip_code_fences_plain_json() { + let json = r#"{"title":"Buy milk"}"#; + assert_eq!(strip_code_fences(json), json); + } + + #[test] + fn strip_code_fences_markdown_wrapped() { + let input = "```json\n{\"title\":\"Buy milk\"}\n```"; + assert_eq!(strip_code_fences(input), r#"{"title":"Buy milk"}"#); + } + + #[test] + fn strip_code_fences_no_language_tag() { + let input = "```\n{\"title\":\"Buy milk\"}\n```"; + assert_eq!(strip_code_fences(input), r#"{"title":"Buy milk"}"#); + } + + #[test] + fn is_essentially_same_basic() { + assert!(is_essentially_same("hello", "hello")); + assert!(is_essentially_same("Hello", "hello")); + assert!(is_essentially_same("hello.", "hello")); + assert!(is_essentially_same("hello!", "Hello")); + assert!(!is_essentially_same("hello", "world")); + } + + #[test] + fn is_essentially_same_empty() { + assert!(is_essentially_same("", "")); + assert!(!is_essentially_same("hello", "")); + assert!(!is_essentially_same("", "hello")); + } + + #[test] + fn parse_ai_response_empty_title_falls_back() { + let today = chrono::NaiveDate::from_ymd_opt(2026, 3, 25).unwrap(); + let response = r#"{"title":"","body":"","project":"","area":"","scheduledRef":"","dueRef":"","deferUntilRef":""}"#; + let result = parse_ai_response(response, "Buy milk", &[], &[], today).unwrap(); + assert_eq!(result.title, "Buy milk"); + } + + #[test] + fn parse_ai_response_non_json_fallback() { + let today = chrono::NaiveDate::from_ymd_opt(2026, 3, 25).unwrap(); + let response = "This is not JSON at all"; + let result = parse_ai_response(response, "Buy milk", &[], &[], today).unwrap(); + assert_eq!(result.title, "Buy milk"); + assert_eq!(result.body, "This is not JSON at all"); + } } // ============================================================================= diff --git a/tdn-desktop/src-tauri/src/commands/ai_resolve.rs b/tdn-desktop/src-tauri/src/commands/ai_resolve.rs index d57d50c4..caa61ff4 100644 --- a/tdn-desktop/src-tauri/src/commands/ai_resolve.rs +++ b/tdn-desktop/src-tauri/src/commands/ai_resolve.rs @@ -36,17 +36,17 @@ pub fn resolve_date_expression(expr: &str, today: NaiveDate) -> Option { return Some(trimmed.to_string()); } - // Try custom handlers first for patterns fuzzydate doesn't support - if let Some(date) = resolve_end_of_month(trimmed, today) { + // Preprocess: strip ordinal suffixes and "on"/"by" prefixes + let cleaned = preprocess_date_expr(trimmed); + + // Try custom handlers for patterns fuzzydate doesn't support + if let Some(date) = resolve_end_of_month(&cleaned, today) { return Some(date.format("%Y-%m-%d").to_string()); } - if let Some(date) = resolve_in_n_weeks(trimmed, today) { + if let Some(date) = resolve_in_n_weeks(&cleaned, today) { return Some(date.format("%Y-%m-%d").to_string()); } - // Preprocess: strip ordinal suffixes and "on" prefix that fuzzydate doesn't handle - let cleaned = preprocess_date_expr(trimmed); - // Use fuzzydate to parse the expression relative to today let reference = today.and_hms_opt(12, 0, 0)?; // noon to avoid edge cases match fuzzydate::parse_relative_to(&cleaned, reference) { @@ -153,8 +153,17 @@ fn resolve_in_n_weeks(expr: &str, today: NaiveDate) -> Option { return Some(today + chrono::Duration::days(n)); } if parts[1].starts_with("month") { - // Approximate: 30 days per month - return Some(today + chrono::Duration::days(n * 30)); + // Proper calendar month arithmetic + let new_month = today.month() as i64 + n; + let year_offset = (new_month - 1) / 12; + let month = ((new_month - 1) % 12 + 1) as u32; + let year = today.year() + year_offset as i32; + // Clamp day to month end (e.g. Jan 31 + 1 month → Feb 28) + let max_day = last_day_of_month(year, month) + .map(|d| d.day()) + .unwrap_or(28); + let day = today.day().min(max_day); + return NaiveDate::from_ymd_opt(year, month, day); } } } @@ -334,6 +343,34 @@ mod tests { assert_eq!(resolve_date_expression("banana", test_date()), None); } + #[test] + fn date_in_one_month() { + // March 25 + 1 month = April 25 (proper calendar arithmetic, not 30 days) + assert_eq!( + resolve_date_expression("in 1 month", test_date()), + Some("2026-04-25".into()) + ); + } + + #[test] + fn date_in_month_clamps_to_month_end() { + // Jan 31 + 1 month should be Feb 28 (not March 3) + let jan31 = NaiveDate::from_ymd_opt(2026, 1, 31).unwrap(); + assert_eq!( + resolve_date_expression("in 1 month", jan31), + Some("2026-02-28".into()) + ); + } + + #[test] + fn date_by_end_of_march() { + // "by end of March" — preprocessing strips "by", then resolves "end of March" + assert_eq!( + resolve_date_expression("by end of March", test_date()), + Some("2026-03-31".into()) + ); + } + // ── Project fuzzy matching tests ───────────────────────────────────── fn test_projects() -> Vec { diff --git a/tdn-desktop/src-tauri/src/commands/mod.rs b/tdn-desktop/src-tauri/src/commands/mod.rs index b9f831ef..bc0bee13 100644 --- a/tdn-desktop/src-tauri/src/commands/mod.rs +++ b/tdn-desktop/src-tauri/src/commands/mod.rs @@ -4,8 +4,8 @@ //! Import specific commands via their submodule (e.g., `commands::preferences::greet`). pub mod ai; -pub mod ai_prompts; -pub mod ai_resolve; +pub(crate) mod ai_prompts; +pub(crate) mod ai_resolve; pub mod config; pub mod notifications; pub mod preferences; diff --git a/tdn-desktop/src-tauri/src/vault/manager.rs b/tdn-desktop/src-tauri/src/vault/manager.rs index b2ceaf18..a851e68b 100644 --- a/tdn-desktop/src-tauri/src/vault/manager.rs +++ b/tdn-desktop/src-tauri/src/vault/manager.rs @@ -508,21 +508,22 @@ impl VaultManager { let task = self.get_task(&update.id)?; - // Resolve project/area IDs to titles for wikilinks + // Resolve project/area IDs to titles for wikilinks (single lock) let mut update = update; - if let Some(ref value) = update.project { - if !value.is_empty() { - let inner = self.inner.read(); - if let Some(project) = inner.index.get_project(value) { - update.project = Some(project.title.clone()); + { + let inner = self.inner.read(); + if let Some(ref value) = update.project { + if !value.is_empty() { + if let Some(project) = inner.index.get_project(value) { + update.project = Some(project.title.clone()); + } } } - } - if let Some(ref value) = update.area { - if !value.is_empty() { - let inner = self.inner.read(); - if let Some(area) = inner.index.get_area(value) { - update.area = Some(area.title.clone()); + if let Some(ref value) = update.area { + if !value.is_empty() { + if let Some(area) = inner.index.get_area(value) { + update.area = Some(area.title.clone()); + } } } } diff --git a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx index 3a68404b..83753fed 100644 --- a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx +++ b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx @@ -368,11 +368,10 @@ export default function QuickPaneApp() { // Reset form on focus (fresh start) resetForm() - // Load areas, projects, and check AI availability - const [areasResult, projectsResult, aiResult] = await Promise.all([ + // Load areas and projects (required for the pane to work) + const [areasResult, projectsResult] = await Promise.all([ commands.listAreas(), commands.listProjects(), - commands.checkAppleIntelligenceAvailable(), ]) if (areasResult.status === 'ok') { @@ -382,7 +381,12 @@ export default function QuickPaneApp() { setProjects(projectsResult.data) } - setAiAvailable(aiResult) + // Check AI availability separately so failures don't block init + try { + setAiAvailable(await commands.checkAppleIntelligenceAvailable()) + } catch { + setAiAvailable(false) + } // Focus title input setTimeout(() => titleRef.current?.focus(), FOCUS_DELAY_MS) From b37e5c8b1688e0aa22265ba512852c60cd6050fb Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 15:42:29 +0000 Subject: [PATCH 31/32] Address remaining code review findings ai.rs: - Move content-bearing logs from INFO to DEBUG (only visible in dev) - Use captured-identifier format style per project guidelines - Remove useCallback wrapper (React Compiler handles memoization) apple_intelligence.swift: - Add 30-second timeout to DispatchSemaphore.wait() to prevent indefinite hangs if Foundation Models fails - Remove stale contentTagging comment vault.rs: - Add 3 unit tests for ID-to-title resolution in create_task and update_task (valid ID resolves, unknown ID preserved) QuickPaneApp.tsx: - Remove useCallback from handleProcessWithAI Co-Authored-By: Claude Opus 4.6 (1M context) --- tdn-desktop/src-tauri/src/commands/ai.rs | 37 +++--- tdn-desktop/src-tauri/src/commands/vault.rs | 113 ++++++++++++++++++ .../src-tauri/swift/apple_intelligence.swift | 9 +- .../components/quick-pane/QuickPaneApp.tsx | 4 +- 4 files changed, 137 insertions(+), 26 deletions(-) diff --git a/tdn-desktop/src-tauri/src/commands/ai.rs b/tdn-desktop/src-tauri/src/commands/ai.rs index 9dd8d6c6..0bced2a5 100644 --- a/tdn-desktop/src-tauri/src/commands/ai.rs +++ b/tdn-desktop/src-tauri/src/commands/ai.rs @@ -108,13 +108,14 @@ pub(crate) fn process_quick_entry_text_sync( &day_of_week, ); - log::info!("── AI Quick Entry ──────────────────────────────────"); - log::info!("Input: {trimmed:?}"); + log::info!("AI Quick Entry: processing input"); + log::debug!("── AI Quick Entry ──────────────────────────────────"); + log::debug!("Input: {trimmed:?}"); log::debug!("System prompt:\n{system_prompt}"); let response = crate::apple_intelligence::process_text(&system_prompt, trimmed, 0)?; - log::info!("Raw response: {response}"); + log::debug!("Raw response: {response}"); let mut result = parse_ai_response(&response, trimmed, projects, areas, today.date_naive())?; @@ -122,23 +123,16 @@ pub(crate) fn process_quick_entry_text_sync( // Determine status via keyword detection (not LLM) result.status = detect_status_from_keywords(trimmed).to_string(); - log::info!("Mapped result:"); - log::info!(" title: {:?}", result.title); - log::info!( - " body: {:?}", - if result.body.is_empty() { - "(empty)" - } else { - &result.body - } - ); - log::info!(" status: {:?}", result.status); - log::info!(" due: {:?}", result.due); - log::info!(" scheduled: {:?}", result.scheduled); - log::info!(" defer: {:?}", result.defer_until); - log::info!(" project: {:?}", result.project_id); - log::info!(" area: {:?}", result.area_id); - log::info!("────────────────────────────────────────────────────"); + log::debug!("Mapped result:"); + log::debug!(" title: {:?}", result.title); + log::debug!(" status: {:?}", result.status); + log::debug!(" due: {:?}", result.due); + log::debug!(" scheduled: {:?}", result.scheduled); + log::debug!(" defer: {:?}", result.defer_until); + log::debug!(" project: {:?}", result.project_id); + log::debug!(" area: {:?}", result.area_id); + log::debug!("────────────────────────────────────────────────────"); + log::info!("AI Quick Entry: complete"); Ok(result) } @@ -200,7 +194,8 @@ fn parse_ai_response( if body_from_ai.is_empty() || is_essentially_same(&body_from_ai, original_text.trim()) { original_text.trim().to_string() } else { - format!("{}\n\n{}", original_text.trim(), body_from_ai) + let original_trimmed = original_text.trim(); + format!("{original_trimmed}\n\n{body_from_ai}") } }; diff --git a/tdn-desktop/src-tauri/src/commands/vault.rs b/tdn-desktop/src-tauri/src/commands/vault.rs index 4ccaec3a..b3f5327b 100644 --- a/tdn-desktop/src-tauri/src/commands/vault.rs +++ b/tdn-desktop/src-tauri/src/commands/vault.rs @@ -978,4 +978,117 @@ updated-at: 2025-01-15 assert_eq!(manager.list_projects().unwrap().len(), 2); assert!(manager.list_areas().unwrap().is_empty()); } + + // ------------------------------------------------------------------------- + // ID-to-Title Resolution Tests + // ------------------------------------------------------------------------- + + #[test] + fn create_task_resolves_project_id_to_title() { + let temp_dir = create_test_vault(); + let manager = create_test_manager(&temp_dir); + + // Create a project first + let project = manager + .create_project(CreateProjectOptions { + title: "My Project".to_string(), + status: None, + area_id: None, + start_date: None, + end_date: None, + description: None, + }) + .unwrap(); + + // Create a task using the project's hash ID + let task = manager + .create_task(CreateTaskOptions { + title: Some("Test Task".to_string()), + project_id: Some(project.id.clone()), + ..Default::default() + }) + .unwrap(); + + // The wikilink should contain the title, not the hash ID + assert!(task.project.is_some()); + let project_ref = task.project.unwrap(); + assert!( + project_ref.contains("My Project"), + "Expected wikilink with title, got: {project_ref}" + ); + assert!( + !project_ref.contains(&project.id), + "Wikilink should not contain hash ID" + ); + } + + #[test] + fn create_task_preserves_unknown_project_id() { + let temp_dir = create_test_vault(); + let manager = create_test_manager(&temp_dir); + + // Use an ID that doesn't match any project — should be preserved as-is + let task = manager + .create_task(CreateTaskOptions { + title: Some("Test Task".to_string()), + project_id: Some("nonexistent-id".to_string()), + ..Default::default() + }) + .unwrap(); + + // The original string is preserved since it didn't resolve + assert!(task.project.is_some()); + let project_ref = task.project.unwrap(); + assert!( + project_ref.contains("nonexistent-id"), + "Unknown ID should be preserved: {project_ref}" + ); + } + + #[test] + fn update_task_resolves_project_id_to_title() { + let temp_dir = create_test_vault(); + let manager = create_test_manager(&temp_dir); + + // Create project and task + let project = manager + .create_project(CreateProjectOptions { + title: "Update Project".to_string(), + status: None, + area_id: None, + start_date: None, + end_date: None, + description: None, + }) + .unwrap(); + + let task = manager + .create_task(CreateTaskOptions { + title: Some("Task".to_string()), + ..Default::default() + }) + .unwrap(); + + // Update task with project hash ID + let updated = manager + .update_task(TaskUpdate { + id: task.id, + project: Some(project.id.clone()), + title: None, + status: None, + area: None, + scheduled: None, + due: None, + defer_until: None, + body: None, + }) + .unwrap(); + + assert!(updated.project.is_some()); + let project_ref = updated.project.unwrap(); + assert!( + project_ref.contains("Update Project"), + "Expected resolved title, got: {project_ref}" + ); + } } diff --git a/tdn-desktop/src-tauri/swift/apple_intelligence.swift b/tdn-desktop/src-tauri/swift/apple_intelligence.swift index bab5d558..acb8dd96 100644 --- a/tdn-desktop/src-tauri/swift/apple_intelligence.swift +++ b/tdn-desktop/src-tauri/swift/apple_intelligence.swift @@ -110,7 +110,6 @@ public func processTextWithSystemPrompt( return responsePtr } - // Use contentTagging adapter — optimized for extraction and classification tasks let model = SystemLanguageModel.default guard model.availability == .available else { responsePtr.pointee.error_message = duplicateCString( @@ -153,9 +152,13 @@ public func processTextWithSystemPrompt( } } - semaphore.wait() + let timeout = semaphore.wait(timeout: .now() + 30.0) - if let response = box.response { + if timeout == .timedOut { + responsePtr.pointee.error_message = duplicateCString( + "Apple Intelligence timed out after 30 seconds." + ) + } else if let response = box.response { responsePtr.pointee.response = duplicateCString(response) responsePtr.pointee.success = 1 } else { diff --git a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx index 83753fed..f1c3fadf 100644 --- a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx +++ b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx @@ -256,7 +256,7 @@ export default function QuickPaneApp() { // AI Processing Handler // ───────────────────────────────────────────────────────────────────────── - const handleProcessWithAI = React.useCallback(async () => { + const handleProcessWithAI = async () => { const trimmedTitle = title.trim() if (!trimmedTitle || isProcessingAI) return @@ -335,7 +335,7 @@ export default function QuickPaneApp() { } setIsProcessingAI(false) - }, [title, projects, areas, isProcessingAI]) + } // ───────────────────────────────────────────────────────────────────────── // Theme Sync From 3ca0edae362fee728e95577f6062ceb1e01ecf6d Mon Sep 17 00:00:00 2001 From: Danny Smith Date: Thu, 26 Mar 2026 16:38:22 +0000 Subject: [PATCH 32/32] Address second round of code review findings apple_intelligence.swift: - Replace manual JSON escaping with JSONSerialization for safe output - Cancel the detached Task on semaphore timeout (was left running) - Mark maxTokens as unused (_maxTokens) for ABI compatibility - Remove stale contentTagging comment QuickPaneApp.tsx: - Add session token (aiSessionRef) to ignore late AI completions when pane is dismissed and reopened during processing - Clear ALL fields on AI result (not just truthy ones) so re-processing doesn't leave stale values from a previous run Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src-tauri/swift/apple_intelligence.swift | 37 ++++++++----------- .../swift/apple_intelligence_stub.swift | 2 +- .../components/quick-pane/QuickPaneApp.tsx | 26 +++++++------ 3 files changed, 31 insertions(+), 34 deletions(-) diff --git a/tdn-desktop/src-tauri/swift/apple_intelligence.swift b/tdn-desktop/src-tauri/swift/apple_intelligence.swift index acb8dd96..f188c9ec 100644 --- a/tdn-desktop/src-tauri/swift/apple_intelligence.swift +++ b/tdn-desktop/src-tauri/swift/apple_intelligence.swift @@ -51,28 +51,22 @@ private func stripInvisibleChars(_ text: String) -> String { @available(macOS 26.0, *) private func parsedTaskToJSON(_ task: ParsedTask) -> String { - // Build JSON manually to avoid Codable complexity with @Generable - let fields: [(String, String)] = [ - ("title", task.title), - ("body", task.body), - ("project", task.project), - ("area", task.area), - ("scheduledRef", task.scheduledRef), - ("dueRef", task.dueRef), - ("deferUntilRef", task.deferUntilRef), + let dict: [String: String] = [ + "title": task.title, + "body": task.body, + "project": task.project, + "area": task.area, + "scheduledRef": task.scheduledRef, + "dueRef": task.dueRef, + "deferUntilRef": task.deferUntilRef, ] - let pairs = fields.map { (key, value) in - let escaped = value - .replacingOccurrences(of: "\\", with: "\\\\") - .replacingOccurrences(of: "\"", with: "\\\"") - .replacingOccurrences(of: "\n", with: "\\n") - .replacingOccurrences(of: "\r", with: "\\r") - .replacingOccurrences(of: "\t", with: "\\t") - return "\"\(key)\":\"\(escaped)\"" + guard let data = try? JSONSerialization.data(withJSONObject: dict, options: []), + let json = String(data: data, encoding: .utf8) else { + // Fallback: return minimal valid JSON on serialization failure + return "{\"title\":\"\",\"body\":\"\",\"project\":\"\",\"area\":\"\",\"scheduledRef\":\"\",\"dueRef\":\"\",\"deferUntilRef\":\"\"}" } - - return "{\(pairs.joined(separator: ","))}" + return json } // MARK: - Public C-callable functions @@ -96,7 +90,7 @@ public func isAppleIntelligenceAvailable() -> Int32 { public func processTextWithSystemPrompt( _ systemPrompt: UnsafePointer, _ userContent: UnsafePointer, - maxTokens: Int32 + _maxTokens: Int32 // unused, kept for ABI compatibility ) -> UnsafeMutablePointer { let swiftSystemPrompt = String(cString: systemPrompt) let swiftUserContent = String(cString: userContent) @@ -126,7 +120,7 @@ public func processTextWithSystemPrompt( } let box = ResultBox() - Task.detached(priority: .userInitiated) { + let task = Task.detached(priority: .userInitiated) { defer { semaphore.signal() } do { let session = LanguageModelSession( @@ -155,6 +149,7 @@ public func processTextWithSystemPrompt( let timeout = semaphore.wait(timeout: .now() + 30.0) if timeout == .timedOut { + task.cancel() responsePtr.pointee.error_message = duplicateCString( "Apple Intelligence timed out after 30 seconds." ) diff --git a/tdn-desktop/src-tauri/swift/apple_intelligence_stub.swift b/tdn-desktop/src-tauri/swift/apple_intelligence_stub.swift index ecfee351..d1b49ab6 100644 --- a/tdn-desktop/src-tauri/swift/apple_intelligence_stub.swift +++ b/tdn-desktop/src-tauri/swift/apple_intelligence_stub.swift @@ -15,7 +15,7 @@ public func isAppleIntelligenceAvailable() -> Int32 { public func processTextWithSystemPrompt( _ systemPrompt: UnsafePointer, _ userContent: UnsafePointer, - maxTokens: Int32 + _maxTokens: Int32 ) -> UnsafeMutablePointer { let responsePtr = ResponsePointer.allocate(capacity: 1) responsePtr.initialize(to: AppleLLMResponse(response: nil, success: 0, error_message: nil)) diff --git a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx index f1c3fadf..a3a217c1 100644 --- a/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx +++ b/tdn-desktop/src/components/quick-pane/QuickPaneApp.tsx @@ -129,6 +129,7 @@ export default function QuickPaneApp() { const titleRef = React.useRef(null) const bodyRef = React.useRef(null) + const aiSessionRef = React.useRef(0) // ───────────────────────────────────────────────────────────────────────── // Reset Form @@ -260,6 +261,8 @@ export default function QuickPaneApp() { const trimmedTitle = title.trim() if (!trimmedTitle || isProcessingAI) return + // Session token to ignore late completions (e.g. if pane was dismissed and reopened) + const sessionId = ++aiSessionRef.current setIsProcessingAI(true) try { @@ -279,6 +282,9 @@ export default function QuickPaneApp() { areaPairs ) + // Ignore late result if a new session started or pane was reset + if (aiSessionRef.current !== sessionId) return + if (result.status === 'error') { logger.warn('AI processing failed', { error: result.error }) setIsProcessingAI(false) @@ -287,13 +293,15 @@ export default function QuickPaneApp() { const parsed = result.data - // Populate form fields from AI result + // Populate ALL form fields from AI result (clear fields not set by AI) setTitle(parsed.title) - - if (parsed.body) { - setBody(parsed.body) - setShowBody(true) - } + setBody(parsed.body || '') + setShowBody(!!parsed.body) + setDue(parsed.due ?? null) + setScheduled(parsed.scheduled ?? null) + setDeferUntil(parsed.deferUntil ?? null) + setProjectId(parsed.projectId ?? null) + setAreaId(parsed.areaId ?? null) // Set status from keyword detection (Rust handles this, not the LLM) const validStatuses: TaskStatus[] = [ @@ -307,12 +315,6 @@ export default function QuickPaneApp() { setStatus(parsed.status as TaskStatus) } - if (parsed.due) setDue(parsed.due) - if (parsed.scheduled) setScheduled(parsed.scheduled) - if (parsed.deferUntil) setDeferUntil(parsed.deferUntil) - if (parsed.projectId) setProjectId(parsed.projectId) - if (parsed.areaId) setAreaId(parsed.areaId) - // Auto-ready Rule 2 (AI only): if scheduled within 7 days and status // is still inbox (keyword detection didn't override), promote to ready. // Compare date strings (YYYY-MM-DD) to avoid time-of-day issues.