diff --git a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift
index ff29a1eb..5384b9cf 100644
--- a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift
+++ b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift
@@ -381,6 +381,14 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
                         // Seed for the reuse path is sampled at the end of this decodePrompt; apply
                         // the word-continuation constraint to it just like the fresh path does.
                         engine.setForceWordContinuation(autocompleteSequenceID, options.forceWordContinuation)
+                        // Per-token log-probabilities cost two O(vocab) passes each in the engine;
+                        // only compute them when the confidence gate would actually read them.
+                        // Re-assert per request: the floor is not part of the sampling fingerprint,
+                        // so a reused sequence must not carry a stale flag.
+                        engine.setComputeLogprob(
+                            autocompleteSequenceID,
+                            options.confidenceFloor > -.infinity
+                        )
                         var mutableRemaining = remaining
                         let status = engine.decodePrompt(
                             autocompleteSequenceID,
@@ -420,6 +428,10 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
         // The engine samples the first (seed) token at the end of decodePrompt, so set the
         // word-continuation constraint here, before decoding.
         engine.setForceWordContinuation(seqID, options.forceWordContinuation)
+        // Skip the engine's per-token log-probability work (two O(vocab) passes per token)
+        // whenever confidence suppression is disabled — the shipping default — since the value
+        // would be summed and then discarded.
+        engine.setComputeLogprob(seqID, options.confidenceFloor > -.infinity)
 
         var tokens = promptTokens
         let status = engine.decodePrompt(seqID, &tokens, Int32(tokens.count), 0)
diff --git a/Cotabby/Support/AXHelper.swift b/Cotabby/Support/AXHelper.swift
index 773cdfb2..ef740234 100644
--- a/Cotabby/Support/AXHelper.swift
+++ b/Cotabby/Support/AXHelper.swift
@@ -774,8 +774,33 @@ enum AXHelper {
         return flipped
     }
 
+    /// Cached display list. Display configuration changes are rare (plug/unplug, resolution or
+    /// arrangement changes), but `cocoaRect`/`validatedCocoaTextRect` run for every AX rect at the
+    /// focus-poll cadence — rebuilding `NSScreen.screens` + `CGDisplayBounds` per conversion
+    /// multiplied AppKit/CoreGraphics traffic by the resolve rate for identical results. All AX
+    /// geometry work happens on the main thread, so unsynchronized statics are safe here.
+    private static var cachedDisplayGeometries: [DisplayGeometry]?
+
+    /// Invalidation hook for the cache above. macOS posts `didChangeScreenParameters` for every
+    /// event that can alter the display list (connect/disconnect, resolution, arrangement, Dock
+    /// and menu-bar resizes affecting `visibleFrame`). Lazily installed via the first
+    /// `displayGeometries()` call, so the observer always exists before a cached value could go
+    /// stale.
+    private static let displayChangeObserver: NSObjectProtocol = NotificationCenter.default.addObserver(
+        forName: NSApplication.didChangeScreenParametersNotification,
+        object: nil,
+        queue: .main
+    ) { _ in
+        cachedDisplayGeometries = nil
+    }
+
     private static func displayGeometries() -> [DisplayGeometry] {
-        NSScreen.screens.compactMap { screen in
+        _ = displayChangeObserver
+        if let cachedDisplayGeometries {
+            return cachedDisplayGeometries
+        }
+
+        let geometries = NSScreen.screens.compactMap { screen -> DisplayGeometry? in
             guard let number = screen.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")]
                 as? NSNumber
             else {
@@ -790,6 +815,8 @@ enum AXHelper {
                 backingScaleFactor: screen.backingScaleFactor
             )
         }
+        cachedDisplayGeometries = geometries
+        return geometries
     }
 
     /// Last-resort fallback for unusual virtual displays where AppKit cannot expose a display ID.
diff --git a/Cotabby/Support/SuggestionTextNormalizer.swift b/Cotabby/Support/SuggestionTextNormalizer.swift
index 03707d36..eafd1422 100644
--- a/Cotabby/Support/SuggestionTextNormalizer.swift
+++ b/Cotabby/Support/SuggestionTextNormalizer.swift
@@ -188,6 +188,13 @@ enum SuggestionTextNormalizer {
     /// Removes `<think>…</think>` reasoning blocks: complete blocks first, then any dangling open
     /// tag left when generation hit the token limit before the block was closed.
     private static func stripThinkBlocks(_ text: String) -> String {
+        // Both patterns below require a literal `<think>`, so this cheap scan lets the common case
+        // (no reasoning block — the vast majority of completions) skip regex work entirely.
+        // `String.range(of:options:.regularExpression)` compiles its pattern on every call, and
+        // this runs on the per-prediction critical path.
+        guard text.contains("<think>") else {
+            return text
+        }
         var result = text
         if let complete = result.range(of: "<think>[\\s\\S]*?</think>", options: .regularExpression) {
             result.replaceSubrange(complete, with: "")
@@ -269,12 +276,16 @@ enum SuggestionTextNormalizer {
         "App:"
     ]
 
+    /// `scaffoldingLabels` ordered longest-first, computed once. The ordering is what makes
+    /// "Text before the caret:" win over a shorter sibling; sorting on every call repeated that
+    /// work on the per-prediction critical path for an identical result.
+    private static let labelsByLengthDescending: [String] = scaffoldingLabels.sorted { $0.count > $1.count }
+
     /// Removes a leading run of known prompt-scaffolding labels (see `scaffoldingLabels`), whether
     /// each sits on its own line or inline before the continuation. Only labels at the very start
     /// are stripped; a label appearing later in the text is left alone because by then it is far
     /// more likely to be real user content than echoed scaffolding.
     private static func stripLeadingScaffoldingLabels(_ text: String) -> String {
-        let labelsByLengthDescending = scaffoldingLabels.sorted { $0.count > $1.count }
         var working = text
 
         while true {