diff --git a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift
index ff29a1eb..5384b9cf 100644
--- a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift
+++ b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift
@@ -381,6 +381,14 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
// Seed for the reuse path is sampled at the end of this decodePrompt; apply
// the word-continuation constraint to it just like the fresh path does.
engine.setForceWordContinuation(autocompleteSequenceID, options.forceWordContinuation)
+ // Per-token log-probabilities cost two O(vocab) passes each in the engine;
+ // only compute them when the confidence gate would actually read them.
+ // Re-assert per request: the floor is not part of the sampling fingerprint,
+ // so a reused sequence must not carry a stale flag.
+ engine.setComputeLogprob(
+ autocompleteSequenceID,
+ options.confidenceFloor > -.infinity
+ )
var mutableRemaining = remaining
let status = engine.decodePrompt(
autocompleteSequenceID,
@@ -420,6 +428,10 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
// The engine samples the first (seed) token at the end of decodePrompt, so set the
// word-continuation constraint here, before decoding.
engine.setForceWordContinuation(seqID, options.forceWordContinuation)
+ // Skip the engine's per-token log-probability work (two O(vocab) passes per token)
+ // whenever confidence suppression is disabled — the shipping default — since the value
+ // would be summed and then discarded.
+ engine.setComputeLogprob(seqID, options.confidenceFloor > -.infinity)
var tokens = promptTokens
let status = engine.decodePrompt(seqID, &tokens, Int32(tokens.count), 0)
diff --git a/Cotabby/Support/AXHelper.swift b/Cotabby/Support/AXHelper.swift
index 773cdfb2..ef740234 100644
--- a/Cotabby/Support/AXHelper.swift
+++ b/Cotabby/Support/AXHelper.swift
@@ -774,8 +774,33 @@ enum AXHelper {
return flipped
}
+ /// Cached display list. Display configuration changes are rare (plug/unplug, resolution or
+ /// arrangement changes), but `cocoaRect`/`validatedCocoaTextRect` run for every AX rect at the
+ /// focus-poll cadence — rebuilding `NSScreen.screens` + `CGDisplayBounds` per conversion
+ /// multiplied AppKit/CoreGraphics traffic by the resolve rate for identical results. All AX
+ /// geometry work happens on the main thread, so unsynchronized statics are safe here.
+ private static var cachedDisplayGeometries: [DisplayGeometry]?
+
+ /// Invalidation hook for the cache above. macOS posts `didChangeScreenParameters` for every
+ /// event that can alter the display list (connect/disconnect, resolution, arrangement, Dock
+ /// and menu-bar resizes affecting `visibleFrame`). Lazily installed via the first
+ /// `displayGeometries()` call, so the observer always exists before a cached value could go
+ /// stale.
+ private static let displayChangeObserver: NSObjectProtocol = NotificationCenter.default.addObserver(
+ forName: NSApplication.didChangeScreenParametersNotification,
+ object: nil,
+ queue: .main
+ ) { _ in
+ cachedDisplayGeometries = nil
+ }
+
private static func displayGeometries() -> [DisplayGeometry] {
- NSScreen.screens.compactMap { screen in
+ _ = displayChangeObserver
+ if let cachedDisplayGeometries {
+ return cachedDisplayGeometries
+ }
+
+ let geometries = NSScreen.screens.compactMap { screen -> DisplayGeometry? in
guard let number = screen.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")]
as? NSNumber
else {
@@ -790,6 +815,8 @@ enum AXHelper {
backingScaleFactor: screen.backingScaleFactor
)
}
+ cachedDisplayGeometries = geometries
+ return geometries
}
/// Last-resort fallback for unusual virtual displays where AppKit cannot expose a display ID.
diff --git a/Cotabby/Support/SuggestionTextNormalizer.swift b/Cotabby/Support/SuggestionTextNormalizer.swift
index 03707d36..eafd1422 100644
--- a/Cotabby/Support/SuggestionTextNormalizer.swift
+++ b/Cotabby/Support/SuggestionTextNormalizer.swift
@@ -188,6 +188,13 @@ enum SuggestionTextNormalizer {
/// Removes `…` reasoning blocks: complete blocks first, then any dangling open
/// tag left when generation hit the token limit before the block was closed.
private static func stripThinkBlocks(_ text: String) -> String {
+ // Both patterns below require a literal ``, so this cheap scan lets the common case
+ // (no reasoning block — the vast majority of completions) skip regex work entirely.
+ // `String.range(of:options:.regularExpression)` compiles its pattern on every call, and
+ // this runs on the per-prediction critical path.
+ guard text.contains("") else {
+ return text
+ }
var result = text
if let complete = result.range(of: "[\\s\\S]*?", options: .regularExpression) {
result.replaceSubrange(complete, with: "")
@@ -269,12 +276,16 @@ enum SuggestionTextNormalizer {
"App:"
]
+ /// `scaffoldingLabels` ordered longest-first, computed once. The ordering is what makes
+ /// "Text before the caret:" win over a shorter sibling; sorting on every call repeated that
+ /// work on the per-prediction critical path for an identical result.
+ private static let labelsByLengthDescending: [String] = scaffoldingLabels.sorted { $0.count > $1.count }
+
/// Removes a leading run of known prompt-scaffolding labels (see `scaffoldingLabels`), whether
/// each sits on its own line or inline before the continuation. Only labels at the very start
/// are stripped; a label appearing later in the text is left alone because by then it is far
/// more likely to be real user content than echoed scaffolding.
private static func stripLeadingScaffoldingLabels(_ text: String) -> String {
- let labelsByLengthDescending = scaffoldingLabels.sorted { $0.count > $1.count }
var working = text
while true {