diff --git a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift index ff29a1eb..5384b9cf 100644 --- a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift +++ b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift @@ -381,6 +381,14 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable { // Seed for the reuse path is sampled at the end of this decodePrompt; apply // the word-continuation constraint to it just like the fresh path does. engine.setForceWordContinuation(autocompleteSequenceID, options.forceWordContinuation) + // Per-token log-probabilities cost two O(vocab) passes each in the engine; + // only compute them when the confidence gate would actually read them. + // Re-assert per request: the floor is not part of the sampling fingerprint, + // so a reused sequence must not carry a stale flag. + engine.setComputeLogprob( + autocompleteSequenceID, + options.confidenceFloor > -.infinity + ) var mutableRemaining = remaining let status = engine.decodePrompt( autocompleteSequenceID, @@ -420,6 +428,10 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable { // The engine samples the first (seed) token at the end of decodePrompt, so set the // word-continuation constraint here, before decoding. engine.setForceWordContinuation(seqID, options.forceWordContinuation) + // Skip the engine's per-token log-probability work (two O(vocab) passes per token) + // whenever confidence suppression is disabled — the shipping default — since the value + // would be summed and then discarded. + engine.setComputeLogprob(seqID, options.confidenceFloor > -.infinity) var tokens = promptTokens let status = engine.decodePrompt(seqID, &tokens, Int32(tokens.count), 0) diff --git a/Cotabby/Support/AXHelper.swift b/Cotabby/Support/AXHelper.swift index 773cdfb2..ef740234 100644 --- a/Cotabby/Support/AXHelper.swift +++ b/Cotabby/Support/AXHelper.swift @@ -774,8 +774,33 @@ enum AXHelper { return flipped } + /// Cached display list. Display configuration changes are rare (plug/unplug, resolution or + /// arrangement changes), but `cocoaRect`/`validatedCocoaTextRect` run for every AX rect at the + /// focus-poll cadence — rebuilding `NSScreen.screens` + `CGDisplayBounds` per conversion + /// multiplied AppKit/CoreGraphics traffic by the resolve rate for identical results. All AX + /// geometry work happens on the main thread, so unsynchronized statics are safe here. + private static var cachedDisplayGeometries: [DisplayGeometry]? + + /// Invalidation hook for the cache above. macOS posts `didChangeScreenParameters` for every + /// event that can alter the display list (connect/disconnect, resolution, arrangement, Dock + /// and menu-bar resizes affecting `visibleFrame`). Lazily installed via the first + /// `displayGeometries()` call, so the observer always exists before a cached value could go + /// stale. + private static let displayChangeObserver: NSObjectProtocol = NotificationCenter.default.addObserver( + forName: NSApplication.didChangeScreenParametersNotification, + object: nil, + queue: .main + ) { _ in + cachedDisplayGeometries = nil + } + private static func displayGeometries() -> [DisplayGeometry] { - NSScreen.screens.compactMap { screen in + _ = displayChangeObserver + if let cachedDisplayGeometries { + return cachedDisplayGeometries + } + + let geometries = NSScreen.screens.compactMap { screen -> DisplayGeometry? in guard let number = screen.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")] as? NSNumber else { @@ -790,6 +815,8 @@ enum AXHelper { backingScaleFactor: screen.backingScaleFactor ) } + cachedDisplayGeometries = geometries + return geometries } /// Last-resort fallback for unusual virtual displays where AppKit cannot expose a display ID. diff --git a/Cotabby/Support/SuggestionTextNormalizer.swift b/Cotabby/Support/SuggestionTextNormalizer.swift index 03707d36..eafd1422 100644 --- a/Cotabby/Support/SuggestionTextNormalizer.swift +++ b/Cotabby/Support/SuggestionTextNormalizer.swift @@ -188,6 +188,13 @@ enum SuggestionTextNormalizer { /// Removes `` reasoning blocks: complete blocks first, then any dangling open /// tag left when generation hit the token limit before the block was closed. private static func stripThinkBlocks(_ text: String) -> String { + // Both patterns below require a literal ``, so this cheap scan lets the common case + // (no reasoning block — the vast majority of completions) skip regex work entirely. + // `String.range(of:options:.regularExpression)` compiles its pattern on every call, and + // this runs on the per-prediction critical path. + guard text.contains("") else { + return text + } var result = text if let complete = result.range(of: "[\\s\\S]*?", options: .regularExpression) { result.replaceSubrange(complete, with: "") @@ -269,12 +276,16 @@ enum SuggestionTextNormalizer { "App:" ] + /// `scaffoldingLabels` ordered longest-first, computed once. The ordering is what makes + /// "Text before the caret:" win over a shorter sibling; sorting on every call repeated that + /// work on the per-prediction critical path for an identical result. + private static let labelsByLengthDescending: [String] = scaffoldingLabels.sorted { $0.count > $1.count } + /// Removes a leading run of known prompt-scaffolding labels (see `scaffoldingLabels`), whether /// each sits on its own line or inline before the continuation. Only labels at the very start /// are stripped; a label appearing later in the text is left alone because by then it is far /// more likely to be real user content than echoed scaffolding. private static func stripLeadingScaffoldingLabels(_ text: String) -> String { - let labelsByLengthDescending = scaffoldingLabels.sorted { $0.count > $1.count } var working = text while true {