Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 116 additions & 11 deletions KeyType/Logic/Completion/CompletionController.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import Foundation
import LlamaModelRuntime
import MacContextCapture
import ModelManagement
import ModelProfileGeneration
import ModelRuntime
import Observation
import Personalization
Expand Down Expand Up @@ -562,10 +563,18 @@ final class CompletionController {
// optional side sections are frozen briefly so unrelated history/clipboard/OCR updates do
// not rewrite the prompt prefix and destroy KV append reuse mid-burst.
let (sideContext, sideContextReused) = promptSideContext(for: promptContext)
// Relevance-filter the frozen history against the *live* beforeCursor so topically-unrelated
// samples (e.g. a bio stored from an earlier session in the same app) are dropped before they
// reach the prompt. This runs at generation time with the current context, not inside the
// 2-second frozen side-context cache, so the judgment always reflects what the user is typing.
let filteredHistory = WritingHistoryFilter.filterByRelevance(
sideContext.previousUserInputs,
beforeCursor: context.beforeCursor
)
let promptResult = KeyTypeModuleGraph.makePromptBuilder().buildPrompt(
context: promptContext,
customInstructions: settings.promptCustomInstructions(appInstructions: policy.customInstructions),
previousUserInputs: sideContext.previousUserInputs,
previousUserInputs: filteredHistory,
pasteboardText: sideContext.pasteboardText,
screenText: sideContext.screenText,
includeEnvironmentContext: policy.includesEnvironmentContext
Expand All @@ -577,20 +586,29 @@ final class CompletionController {
let healExtraTokens = healSlack > 0 ? 1 : 0
// Completion length is user-configurable (Settings) and maps to the decoder's token/width budget.
let length = settings.completionLength
// Clipboard and OCR are background context, not text to reproduce; carry them so the output
// filter can drop a completion that just parrots them verbatim. History is excluded — it is
// already same-app/domain scoped and echoing the user's own recurring phrases is intended.
let injectedContext = Self.injectedContext(
pasteboardText: sideContext.pasteboardText,
screenText: sideContext.screenText
)
let request = CompletionRequest(
context: context,
prompt: promptResult.prompt,
requiredPrefixBytes: requiredPrefixBytes,
mode: policy.completionMode,
maxCompletionTokens: length.maxCompletionTokens + healExtraTokens,
maxDisplayWidth: length.maxDisplayWidth + healSlack
maxDisplayWidth: length.maxDisplayWidth + healSlack,
injectedContext: injectedContext
)
rememberFullPromptDebug(
for: request,
promptResult: promptResult,
promptContext: promptContext,
tokenHealing: heal.map { FullPromptTokenHealing(head: $0.head, heal: $0.heal) },
sideContext: sideContext,
filteredPreviousUserInputs: filteredHistory,
sideContextReused: sideContextReused,
policy: policy,
completionLength: length,
Expand Down Expand Up @@ -759,6 +777,7 @@ final class CompletionController {
promptContext: TextFieldContext,
tokenHealing: FullPromptTokenHealing?,
sideContext: FrozenPromptSideContext,
filteredPreviousUserInputs: [String],
sideContextReused: Bool,
policy: CompletionPolicy,
completionLength: CompletionLength,
Expand All @@ -776,7 +795,7 @@ final class CompletionController {
historyEnabled: sideContext.historyEnabled,
clipboardEnabled: sideContext.clipboardEnabled,
ocrEnabled: sideContext.ocrEnabled,
previousUserInputs: sideContext.previousUserInputs,
previousUserInputs: filteredPreviousUserInputs,
pasteboardText: sideContext.pasteboardText,
screenText: sideContext.screenText
),
Expand Down Expand Up @@ -886,11 +905,19 @@ final class CompletionController {
return (cached, true)
}

// Scope history to the focused app. Cross-app recent samples bleed unrelated content into the
// prompt — e.g. a Notes draft about an API key surfacing as a verbatim suggestion in a fresh
// Gmail message — which the small model tends to parrot. Same-app history still personalizes
// tone/recurring phrases without leaking content across contexts.
// Normalize an empty domain to nil so it can't collapse the same-app filter to `domain == ""`
// and silently drop all real history for the app.
let scopedDomain = context.target.domain.flatMap { $0.isEmpty ? nil : $0 }
let query = WritingHistoryQuery(
bundleIdentifier: context.target.bundleIdentifier,
domain: context.target.domain,
domain: scopedDomain,
typingContext: context.typingContext,
language: context.detectedLanguage
language: context.detectedLanguage,
sameAppOnly: true
)
let previousUserInputs = settings.historyEnabled
? history.samples(for: query)
Expand Down Expand Up @@ -970,6 +997,59 @@ final class CompletionController {
case notApplicable
}

/// Clipboard + OCR text injected into the prompt, as the echo guard consumes it. History is
/// intentionally excluded (same-app/domain scoped; echoing the user's own phrases is intended).
private static func injectedContext(pasteboardText: String?, screenText: String?) -> [String] {
[pasteboardText, screenText].compactMap { $0 }
}

/// Re-check the context-dependent suppression nets against the *live* context before re-showing a
/// cached completion. The candidate was filtered once at generation time, but reuse re-shows it
/// without going back through the pipeline, and the inputs those nets key off can change after the
/// fact:
/// - prefix-repetition / suffix-overlap key off `beforeCursor`/`afterCursor`, which grow as the
/// user types through the suggestion — a tail clean at anchor time can become a verbatim
/// repetition (or suffix duplication) of text just typed;
/// - the echo guard keys off injected clipboard/OCR context, which can change mid-burst or differ
/// from when an older reused snapshot was generated. We check it against the currently-frozen
/// side context (already cached, so no hot-path pasteboard read).
/// Returns `true` when the remaining text is still safe to show.
private func reuseRemainingPassesLiveGuards(remaining: String, context: TextFieldContext) -> Bool {
Self.reuseRemainingIsSafe(
remaining: remaining,
context: context,
injectedContext: Self.injectedContext(
pasteboardText: frozenSideContext?.pasteboardText,
screenText: frozenSideContext?.screenText
)
)
}

/// Pure decision behind `reuseRemainingPassesLiveGuards`, factored out so the reuse-safety rules
/// are unit-testable without constructing a controller. `true` when `remaining` is still safe to
/// re-show against the given live context and injected side context.
nonisolated static func reuseRemainingIsSafe(
remaining: String,
context: TextFieldContext,
injectedContext: [String]
) -> Bool {
guard !remaining.isEmpty else { return true }
if PrefixRepetitionGuard.repeatsPrefix(completion: remaining, beforeCursor: context.beforeCursor) {
return false
}
if SuffixOverlapGuard.duplicatesSuffix(
completion: remaining,
beforeCursor: context.beforeCursor,
afterCursor: context.afterCursor
) {
return false
}
if ContextEchoGuard.echoesInjectedContext(completion: remaining, injectedContext: injectedContext) {
return false
}
return true
}

@discardableResult
private func applyReuseHistoryIfUseful(
for live: TextFieldContext,
Expand All @@ -980,6 +1060,11 @@ final class CompletionController {

switch reuseHistory.decision(for: live) {
case let .reuse(reuse):
guard reuseRemainingPassesLiveGuards(remaining: reuse.remainingText, context: live) else {
predictionLog.append("REUSE rejected by live guard remaining=\"\(PredictionLog.escape(reuse.remainingText))\"")
clearCompletion()
return .mustRecompute
}
anchorText = reuse.anchorText
anchorContext = reuse.anchorContext
if updateLatestContext { latestContext = live }
Expand Down Expand Up @@ -1292,6 +1377,10 @@ final class CompletionController {
) -> Bool {
switch decision {
case let .reuse(reuse):
guard reuseRemainingPassesLiveGuards(remaining: reuse.remainingText, context: optimistic) else {
predictionLog.append("REUSE rejected by live guard remaining=\"\(PredictionLog.escape(reuse.remainingText))\"")
return false
}
anchorText = reuse.anchorText
anchorContext = reuse.anchorContext
latestContext = optimistic
Expand Down Expand Up @@ -1478,12 +1567,28 @@ final class CompletionController {
forFilename: modelFilename,
vocabSize: runtime.metadata.vocabularySize
)
let profile = try MmapAutocompleteProfile.open(
at: try ModelContainer.profileURL(family: family),
tokenizerVocabSize: runtime.metadata.vocabularySize,
tokenizerBytes: { try runtime.tokenizer.rawBytes(for: $0) },
expectedModelFamily: family
)
let profileURL = try ModelContainer.profileURL(family: family)
func openProfile() throws -> MmapAutocompleteProfile {
try MmapAutocompleteProfile.open(
at: profileURL,
tokenizerVocabSize: runtime.metadata.vocabularySize,
tokenizerBytes: { try runtime.tokenizer.rawBytes(for: $0) },
expectedModelFamily: family
)
}
let profile: MmapAutocompleteProfile
do {
profile = try openProfile()
} catch {
// A profile built by an older classifier / schema version fails to open. No other launch
// path rebuilds it (setup only checks the file *exists*), so an app update that changes the
// token classification would otherwise brick completions for existing users. Rebuild it in
// place from the model's tokenizer, then retry. See ADR-021 / ACPF currentSchemaVersion.
Logger(subsystem: "com.pattonium.KeyType", category: "completion")
.error("ACPF profile open failed (\(String(describing: error), privacy: .public)); rebuilding for \(modelFilename, privacy: .public)")
_ = try await ProfileGenerator.generateProfileIfNeeded(forModelFilename: modelFilename)
profile = try openProfile()
}
// Apply the telemetry-derived nudges to the decoder defaults: a larger relative cutoff keeps
// more branches alive (fewer suppressions), a lower probability floor admits weaker-but-valid
// continuations. Bounds are clamped inside `ThresholdTuner`. See ADR-023.
Expand Down
18 changes: 17 additions & 1 deletion KeyType/Logic/Context/ScreenContextController.swift
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ final class ScreenContextController {
let key = windowKey(for: snapshot)
guard key != lastWindowKey else { return }
lastWindowKey = key
// Drop the previous window's cached OCR *before* kicking off the new (async) capture, so a
// completion fired in the just-focused window can't be fed the prior window's screen text
// while the fresh capture is still in flight. Without this, switching browser tabs/windows
// leaks the old page's text (e.g. a "2 of 10 …" results counter) into the new one's prompt.
engine.clear()
capture(for: snapshot)
}

Expand All @@ -120,7 +125,18 @@ final class ScreenContextController {
// screen context carries only the *surrounding* on-screen text.
let context = snapshot.context
let fieldText = context.beforeCursor + context.afterCursor
engine.refresh(pid: pid, fieldText: fieldText)
// The caret location lets the capturer pick the right window when the app has several open,
// so screen context can't bleed in text from a different window of the same app. `caretRect`
// is in AppKit space (bottom-left origin) but ScreenCaptureKit window frames are in CG space
// (top-left origin), so convert before handing it down — otherwise the Y axes don't match and
// the wrong window (or none) is selected.
let focusPoint = snapshot.caretRect.flatMap { rect -> CGPoint? in
DisplayCoordinateConverter.coreGraphicsPoint(
fromAppKitPoint: CGPoint(x: rect.midX, y: rect.midY),
displays: ScreenDisplayGeometryProvider.current()
)
}
engine.refresh(pid: pid, fieldText: fieldText, focusPoint: focusPoint)
}

// MARK: - Eligibility
Expand Down
4 changes: 4 additions & 0 deletions KeyType/Logic/Telemetry/WritingHistoryRecorder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ final class WritingHistoryRecorder {
guard sample.text.trimmingCharacters(
in: .whitespacesAndNewlines
).count >= minimumCharacters else { return }
// Belt-and-suspenders junk gate: skip entries that aren't prose (bare URLs, UUID blobs,
// filesystem paths) before they reach the encrypted DB. Mirrored in WritingHistorySelection
// for samples already on disk from before this guard was introduced.
guard WritingHistoryFilter.isProse(sample.text) else { return }

// Re-resolve the policy from the captured metadata: secure/sensitive fields and apps that
// disable training-data collection must never contribute samples.
Expand Down
42 changes: 42 additions & 0 deletions KeyTypeTests/KeyTypeTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,48 @@ struct KeyTypeTests {
#expect(advanced == nil)
}

// MARK: - Reuse re-check (H2)

@Test func reuseRejectsRemainingThatRepeatsRecentlyTypedText() {
// As the user types through a cached suggestion, beforeCursor grows; a tail that becomes a
// verbatim repetition of just-typed text must not be re-shown via reuse.
let context = TextFieldContext(
beforeCursor: "You can use it to access the OpenAI. And",
target: Self.target
)
#expect(
CompletionController.reuseRemainingIsSafe(
remaining: " you can use it to access the OpenAI again",
context: context,
injectedContext: []
) == false
)
}

@Test func reuseRejectsRemainingThatEchoesInjectedClipboard() {
// A cached completion (clean at anchor time) must not be re-shown if it now parrots the
// currently-injected clipboard/OCR context.
let context = TextFieldContext(beforeCursor: "Hi Molly,", target: Self.target)
#expect(
CompletionController.reuseRemainingIsSafe(
remaining: " if you require maintenance of UPS systems or",
context: context,
injectedContext: ["if you require maintenance of UPS systems or backup power, call us."]
) == false
)
}

@Test func reuseAllowsGenuineRemaining() {
let context = TextFieldContext(beforeCursor: "Hi Molly,", target: Self.target)
#expect(
CompletionController.reuseRemainingIsSafe(
remaining: " hope you are doing well today",
context: context,
injectedContext: ["if you require maintenance of UPS systems or backup power, call us."]
)
)
}

@Test func promotionCachePromotesLowerRankedBranchWhenTopIsInvalidated() {
let cache = Self.promotionCache(candidates: [
"ship it today",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import Foundation

/// Shared text normalization for the content-overlap guards (`SuffixOverlapGuard`,
/// `PrefixRepetitionGuard`, `ContextEchoGuard`). Comparisons are done on case-folded alphanumeric
/// scalars only, so differences in whitespace, punctuation, and stray symbol glyphs the model
/// sometimes prepends ("**", "•") don't defeat a match.
enum AlphanumericNormalizer {
/// Case-folded string of only the alphanumeric scalars in `text`.
static func normalize(_ text: String) -> String {
var result = String.UnicodeScalarView()
for scalar in text.lowercased().unicodeScalars where CharacterSet.alphanumerics.contains(scalar) {
result.append(scalar)
}
return String(result)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -135,21 +135,29 @@ public struct CompletionRequest: Equatable {
public var mode: CompletionMode
public var maxCompletionTokens: Int
public var maxDisplayWidth: Int
/// Side-context text injected into the prompt that the user did NOT type — clipboard contents and
/// on-screen OCR text. Carried alongside the request so the output filter can drop a completion
/// that merely parrots it verbatim (`ContextEchoGuard`). Writing-history samples are deliberately
/// excluded: they are scoped to the same app/domain and reproducing the user's own recurring
/// phrases is the point of that feature.
public var injectedContext: [String]

public init(
context: TextFieldContext,
prompt: String,
requiredPrefixBytes: [UInt8] = [],
mode: CompletionMode = .prose,
maxCompletionTokens: Int = 4,
maxDisplayWidth: Int = 80
maxDisplayWidth: Int = 80,
injectedContext: [String] = []
) {
self.context = context
self.prompt = prompt
self.requiredPrefixBytes = requiredPrefixBytes
self.mode = mode
self.maxCompletionTokens = maxCompletionTokens
self.maxDisplayWidth = maxDisplayWidth
self.injectedContext = injectedContext
}
}

Expand Down Expand Up @@ -202,6 +210,26 @@ public enum SuppressionReason: Equatable {
/// A mid-line / fill-in-the-middle completion that is too long or too low-probability to show
/// without risking a wrong suggestion.
case lowConfidenceMidLine
/// The completion reproduces a phrase that is already present in the recent text before the caret.
/// Accepting it would create a verbatim repetition loop. See `PrefixRepetitionGuard`.
case repeatsRecentPrefix
/// The completion verbatim-reproduces a span of injected side context the user did not type
/// (clipboard, on-screen OCR text) — the small model parroting context instead of predicting.
/// See `ContextEchoGuard`.
case echoesInjectedContext
/// The completion contains a reserved model-internal marker (e.g. Gemma's `<unused56>`, chat/FIM
/// scaffolding) that should have been masked at sample time. Belt-and-suspenders for stale or
/// mis-flagged token profiles. See `TokenClassifier` / `DefaultCandidateFilter.containsReservedMarker`.
case reservedMarker
/// The completion contains a within-candidate token-repetition loop — the same word appears ≥ 3 times
/// ("text 1 1 1", "since 1 1 1"). Model degeneration, not a bleed from side context.
/// See `IntraCompletionRepetitionGuard`.
case intraCompletionRepetition
/// The completion is nothing but markup tags (`</code>`, `<b>`, …) in a prose/correction context
/// whose surrounding text contains no markup — Gemma's single-token HTML-tag block surfacing in
/// ordinary writing. Sample-time demotion is the primary defence (see
/// `BiasPolicy.markupTagStaticPenalty`); this is its context-aware output net. See `MarkupTagGuard`.
case markupTagOutsideMarkupContext
case noCandidate
}

Expand Down
Loading