Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Cotabby/Services/Runtime/LlamaRuntimeCore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,14 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
// Seed for the reuse path is sampled at the end of this decodePrompt; apply
// the word-continuation constraint to it just like the fresh path does.
engine.setForceWordContinuation(autocompleteSequenceID, options.forceWordContinuation)
// Per-token log-probabilities cost two O(vocab) passes each in the engine;
// only compute them when the confidence gate would actually read them.
// Re-assert per request: the floor is not part of the sampling fingerprint,
// so a reused sequence must not carry a stale flag.
engine.setComputeLogprob(
autocompleteSequenceID,
options.confidenceFloor > -.infinity
)
var mutableRemaining = remaining
let status = engine.decodePrompt(
autocompleteSequenceID,
Expand Down Expand Up @@ -420,6 +428,10 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
// The engine samples the first (seed) token at the end of decodePrompt, so set the
// word-continuation constraint here, before decoding.
engine.setForceWordContinuation(seqID, options.forceWordContinuation)
// Skip the engine's per-token log-probability work (two O(vocab) passes per token)
// whenever confidence suppression is disabled — the shipping default — since the value
// would be summed and then discarded.
engine.setComputeLogprob(seqID, options.confidenceFloor > -.infinity)

var tokens = promptTokens
let status = engine.decodePrompt(seqID, &tokens, Int32(tokens.count), 0)
Expand Down
29 changes: 28 additions & 1 deletion Cotabby/Support/AXHelper.swift
Original file line number Diff line number Diff line change
Expand Up @@ -774,8 +774,33 @@ enum AXHelper {
return flipped
}

/// Cached display list. Display configuration changes are rare (plug/unplug, resolution or
/// arrangement changes), but `cocoaRect`/`validatedCocoaTextRect` run for every AX rect at the
/// focus-poll cadence — rebuilding `NSScreen.screens` + `CGDisplayBounds` per conversion
/// multiplied AppKit/CoreGraphics traffic by the resolve rate for identical results. All AX
/// geometry work happens on the main thread, so unsynchronized statics are safe here.
private static var cachedDisplayGeometries: [DisplayGeometry]?

/// Invalidation hook for the cache above. macOS posts `didChangeScreenParameters` for every
/// event that can alter the display list (connect/disconnect, resolution, arrangement, Dock
/// and menu-bar resizes affecting `visibleFrame`). Lazily installed via the first
/// `displayGeometries()` call, so the observer always exists before a cached value could go
/// stale.
private static let displayChangeObserver: NSObjectProtocol = NotificationCenter.default.addObserver(
forName: NSApplication.didChangeScreenParametersNotification,
object: nil,
queue: .main
) { _ in
cachedDisplayGeometries = nil
}

private static func displayGeometries() -> [DisplayGeometry] {
NSScreen.screens.compactMap { screen in
_ = displayChangeObserver
if let cachedDisplayGeometries {
return cachedDisplayGeometries
}

let geometries = NSScreen.screens.compactMap { screen -> DisplayGeometry? in
guard let number = screen.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")]
as? NSNumber
else {
Expand All @@ -790,6 +815,8 @@ enum AXHelper {
backingScaleFactor: screen.backingScaleFactor
)
}
cachedDisplayGeometries = geometries
return geometries
}

/// Last-resort fallback for unusual virtual displays where AppKit cannot expose a display ID.
Expand Down
13 changes: 12 additions & 1 deletion Cotabby/Support/SuggestionTextNormalizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,13 @@ enum SuggestionTextNormalizer {
/// Removes `<think>…</think>` reasoning blocks: complete blocks first, then any dangling open
/// tag left when generation hit the token limit before the block was closed.
private static func stripThinkBlocks(_ text: String) -> String {
// Both patterns below require a literal `<think>`, so this cheap scan lets the common case
// (no reasoning block — the vast majority of completions) skip regex work entirely.
// `String.range(of:options:.regularExpression)` compiles its pattern on every call, and
// this runs on the per-prediction critical path.
guard text.contains("<think>") else {
return text
}
var result = text
if let complete = result.range(of: "<think>[\\s\\S]*?</think>", options: .regularExpression) {
result.replaceSubrange(complete, with: "")
Expand Down Expand Up @@ -269,12 +276,16 @@ enum SuggestionTextNormalizer {
"App:"
]

/// `scaffoldingLabels` ordered longest-first, computed once. The ordering is what makes
/// "Text before the caret:" win over a shorter sibling; sorting on every call repeated that
/// work on the per-prediction critical path for an identical result.
private static let labelsByLengthDescending: [String] = scaffoldingLabels.sorted { $0.count > $1.count }

/// Removes a leading run of known prompt-scaffolding labels (see `scaffoldingLabels`), whether
/// each sits on its own line or inline before the continuation. Only labels at the very start
/// are stripped; a label appearing later in the text is left alone because by then it is far
/// more likely to be real user content than echoed scaffolding.
private static func stripLeadingScaffoldingLabels(_ text: String) -> String {
let labelsByLengthDescending = scaffoldingLabels.sorted { $0.count > $1.count }
var working = text

while true {
Expand Down