johnbean393 · shreeraman96 · Jun 8, 2026 · Jun 10, 2026
diff --git a/KeyType/Logic/Completion/CompletionController.swift b/KeyType/Logic/Completion/CompletionController.swift
@@ -17,6 +17,7 @@ import Foundation
 import LlamaModelRuntime
 import MacContextCapture
 import ModelManagement
+import ModelProfileGeneration
 import ModelRuntime
 import Observation
 import Personalization
@@ -562,10 +563,18 @@ final class CompletionController {
         // optional side sections are frozen briefly so unrelated history/clipboard/OCR updates do
         // not rewrite the prompt prefix and destroy KV append reuse mid-burst.
         let (sideContext, sideContextReused) = promptSideContext(for: promptContext)
+        // Relevance-filter the frozen history against the *live* beforeCursor so topically-unrelated
+        // samples (e.g. a bio stored from an earlier session in the same app) are dropped before they
+        // reach the prompt. This runs at generation time with the current context, not inside the
+        // 2-second frozen side-context cache, so the judgment always reflects what the user is typing.
+        let filteredHistory = WritingHistoryFilter.filterByRelevance(
+            sideContext.previousUserInputs,
+            beforeCursor: context.beforeCursor
+        )
         let promptResult = KeyTypeModuleGraph.makePromptBuilder().buildPrompt(
             context: promptContext,
             customInstructions: settings.promptCustomInstructions(appInstructions: policy.customInstructions),
-            previousUserInputs: sideContext.previousUserInputs,
+            previousUserInputs: filteredHistory,
             pasteboardText: sideContext.pasteboardText,
             screenText: sideContext.screenText,
             includeEnvironmentContext: policy.includesEnvironmentContext
@@ -577,20 +586,29 @@ final class CompletionController {
         let healExtraTokens = healSlack > 0 ? 1 : 0
         // Completion length is user-configurable (Settings) and maps to the decoder's token/width budget.
         let length = settings.completionLength
+        // Clipboard and OCR are background context, not text to reproduce; carry them so the output
+        // filter can drop a completion that just parrots them verbatim. History is excluded — it is
+        // already same-app/domain scoped and echoing the user's own recurring phrases is intended.
+        let injectedContext = Self.injectedContext(
+            pasteboardText: sideContext.pasteboardText,
+            screenText: sideContext.screenText
+        )
         let request = CompletionRequest(
             context: context,
             prompt: promptResult.prompt,
             requiredPrefixBytes: requiredPrefixBytes,
             mode: policy.completionMode,
             maxCompletionTokens: length.maxCompletionTokens + healExtraTokens,
-            maxDisplayWidth: length.maxDisplayWidth + healSlack
+            maxDisplayWidth: length.maxDisplayWidth + healSlack,
+            injectedContext: injectedContext
         )
         rememberFullPromptDebug(
             for: request,
             promptResult: promptResult,
             promptContext: promptContext,
             tokenHealing: heal.map { FullPromptTokenHealing(head: $0.head, heal: $0.heal) },
             sideContext: sideContext,
+            filteredPreviousUserInputs: filteredHistory,
             sideContextReused: sideContextReused,
             policy: policy,
             completionLength: length,
@@ -759,6 +777,7 @@ final class CompletionController {
         promptContext: TextFieldContext,
         tokenHealing: FullPromptTokenHealing?,
         sideContext: FrozenPromptSideContext,
+        filteredPreviousUserInputs: [String],
         sideContextReused: Bool,
         policy: CompletionPolicy,
         completionLength: CompletionLength,
@@ -776,7 +795,7 @@ final class CompletionController {
                 historyEnabled: sideContext.historyEnabled,
                 clipboardEnabled: sideContext.clipboardEnabled,
                 ocrEnabled: sideContext.ocrEnabled,
-                previousUserInputs: sideContext.previousUserInputs,
+                previousUserInputs: filteredPreviousUserInputs,
                 pasteboardText: sideContext.pasteboardText,
                 screenText: sideContext.screenText
             ),
@@ -886,11 +905,19 @@ final class CompletionController {
             return (cached, true)
         }
 
+        // Scope history to the focused app. Cross-app recent samples bleed unrelated content into the
+        // prompt — e.g. a Notes draft about an API key surfacing as a verbatim suggestion in a fresh
+        // Gmail message — which the small model tends to parrot. Same-app history still personalizes
+        // tone/recurring phrases without leaking content across contexts.
+        // Normalize an empty domain to nil so it can't collapse the same-app filter to `domain == ""`
+        // and silently drop all real history for the app.
+        let scopedDomain = context.target.domain.flatMap { $0.isEmpty ? nil : $0 }
         let query = WritingHistoryQuery(
             bundleIdentifier: context.target.bundleIdentifier,
-            domain: context.target.domain,
+            domain: scopedDomain,
             typingContext: context.typingContext,
-            language: context.detectedLanguage
+            language: context.detectedLanguage,
+            sameAppOnly: true
         )
         let previousUserInputs = settings.historyEnabled
             ? history.samples(for: query)
@@ -970,6 +997,59 @@ final class CompletionController {
         case notApplicable
     }
 
+    /// Clipboard + OCR text injected into the prompt, as the echo guard consumes it. History is
+    /// intentionally excluded (same-app/domain scoped; echoing the user's own phrases is intended).
+    private static func injectedContext(pasteboardText: String?, screenText: String?) -> [String] {
+        [pasteboardText, screenText].compactMap { $0 }
+    }
+
+    /// Re-check the context-dependent suppression nets against the *live* context before re-showing a
+    /// cached completion. The candidate was filtered once at generation time, but reuse re-shows it
+    /// without going back through the pipeline, and the inputs those nets key off can change after the
+    /// fact:
+    ///   - prefix-repetition / suffix-overlap key off `beforeCursor`/`afterCursor`, which grow as the
+    ///     user types through the suggestion — a tail clean at anchor time can become a verbatim
+    ///     repetition (or suffix duplication) of text just typed;
+    ///   - the echo guard keys off injected clipboard/OCR context, which can change mid-burst or differ
+    ///     from when an older reused snapshot was generated. We check it against the currently-frozen
+    ///     side context (already cached, so no hot-path pasteboard read).
+    /// Returns `true` when the remaining text is still safe to show.
+    private func reuseRemainingPassesLiveGuards(remaining: String, context: TextFieldContext) -> Bool {
+        Self.reuseRemainingIsSafe(
+            remaining: remaining,
+            context: context,
+            injectedContext: Self.injectedContext(
+                pasteboardText: frozenSideContext?.pasteboardText,
+                screenText: frozenSideContext?.screenText
+            )
+        )
+    }
+
+    /// Pure decision behind `reuseRemainingPassesLiveGuards`, factored out so the reuse-safety rules
+    /// are unit-testable without constructing a controller. `true` when `remaining` is still safe to
+    /// re-show against the given live context and injected side context.
+    nonisolated static func reuseRemainingIsSafe(
+        remaining: String,
+        context: TextFieldContext,
+        injectedContext: [String]
+    ) -> Bool {
+        guard !remaining.isEmpty else { return true }
+        if PrefixRepetitionGuard.repeatsPrefix(completion: remaining, beforeCursor: context.beforeCursor) {
+            return false
+        }
+        if SuffixOverlapGuard.duplicatesSuffix(
+            completion: remaining,
+            beforeCursor: context.beforeCursor,
+            afterCursor: context.afterCursor
+        ) {
+            return false
+        }
+        if ContextEchoGuard.echoesInjectedContext(completion: remaining, injectedContext: injectedContext) {
+            return false
+        }
+        return true
+    }
+
     @discardableResult
     private func applyReuseHistoryIfUseful(
         for live: TextFieldContext,
@@ -980,6 +1060,11 @@ final class CompletionController {
 
         switch reuseHistory.decision(for: live) {
         case let .reuse(reuse):
+            guard reuseRemainingPassesLiveGuards(remaining: reuse.remainingText, context: live) else {
+                predictionLog.append("REUSE rejected by live guard remaining=\"\(PredictionLog.escape(reuse.remainingText))\"")
+                clearCompletion()
+                return .mustRecompute
+            }
             anchorText = reuse.anchorText
             anchorContext = reuse.anchorContext
             if updateLatestContext { latestContext = live }
@@ -1292,6 +1377,10 @@ final class CompletionController {
     ) -> Bool {
         switch decision {
         case let .reuse(reuse):
+            guard reuseRemainingPassesLiveGuards(remaining: reuse.remainingText, context: optimistic) else {
+                predictionLog.append("REUSE rejected by live guard remaining=\"\(PredictionLog.escape(reuse.remainingText))\"")
+                return false
+            }
             anchorText = reuse.anchorText
             anchorContext = reuse.anchorContext
             latestContext = optimistic
@@ -1478,12 +1567,28 @@ final class CompletionController {
             forFilename: modelFilename,
             vocabSize: runtime.metadata.vocabularySize
         )
-        let profile = try MmapAutocompleteProfile.open(
-            at: try ModelContainer.profileURL(family: family),
-            tokenizerVocabSize: runtime.metadata.vocabularySize,
-            tokenizerBytes: { try runtime.tokenizer.rawBytes(for: $0) },
-            expectedModelFamily: family
-        )
+        let profileURL = try ModelContainer.profileURL(family: family)
+        func openProfile() throws -> MmapAutocompleteProfile {
+            try MmapAutocompleteProfile.open(
+                at: profileURL,
+                tokenizerVocabSize: runtime.metadata.vocabularySize,
+                tokenizerBytes: { try runtime.tokenizer.rawBytes(for: $0) },
+                expectedModelFamily: family
+            )
+        }
+        let profile: MmapAutocompleteProfile
+        do {
+            profile = try openProfile()
+        } catch {
+            // A profile built by an older classifier / schema version fails to open. No other launch
+            // path rebuilds it (setup only checks the file *exists*), so an app update that changes the
+            // token classification would otherwise brick completions for existing users. Rebuild it in
+            // place from the model's tokenizer, then retry. See ADR-021 / ACPF currentSchemaVersion.
+            Logger(subsystem: "com.pattonium.KeyType", category: "completion")
+                .error("ACPF profile open failed (\(String(describing: error), privacy: .public)); rebuilding for \(modelFilename, privacy: .public)")
+            _ = try await ProfileGenerator.generateProfileIfNeeded(forModelFilename: modelFilename)
+            profile = try openProfile()
+        }
         // Apply the telemetry-derived nudges to the decoder defaults: a larger relative cutoff keeps
         // more branches alive (fewer suppressions), a lower probability floor admits weaker-but-valid
         // continuations. Bounds are clamped inside `ThresholdTuner`. See ADR-023.

diff --git a/KeyType/Logic/Context/ScreenContextController.swift b/KeyType/Logic/Context/ScreenContextController.swift
@@ -99,6 +99,11 @@ final class ScreenContextController {
         let key = windowKey(for: snapshot)
         guard key != lastWindowKey else { return }
         lastWindowKey = key
+        // Drop the previous window's cached OCR *before* kicking off the new (async) capture, so a
+        // completion fired in the just-focused window can't be fed the prior window's screen text
+        // while the fresh capture is still in flight. Without this, switching browser tabs/windows
+        // leaks the old page's text (e.g. a "2 of 10 …" results counter) into the new one's prompt.
+        engine.clear()
         capture(for: snapshot)
     }
 
@@ -120,7 +125,18 @@ final class ScreenContextController {
         // screen context carries only the *surrounding* on-screen text.
         let context = snapshot.context
         let fieldText = context.beforeCursor + context.afterCursor
-        engine.refresh(pid: pid, fieldText: fieldText)
+        // The caret location lets the capturer pick the right window when the app has several open,
+        // so screen context can't bleed in text from a different window of the same app. `caretRect`
+        // is in AppKit space (bottom-left origin) but ScreenCaptureKit window frames are in CG space
+        // (top-left origin), so convert before handing it down — otherwise the Y axes don't match and
+        // the wrong window (or none) is selected.
+        let focusPoint = snapshot.caretRect.flatMap { rect -> CGPoint? in
+            DisplayCoordinateConverter.coreGraphicsPoint(
+                fromAppKitPoint: CGPoint(x: rect.midX, y: rect.midY),
+                displays: ScreenDisplayGeometryProvider.current()
+            )
+        }
+        engine.refresh(pid: pid, fieldText: fieldText, focusPoint: focusPoint)
     }
 
     // MARK: - Eligibility

diff --git a/KeyType/Logic/Telemetry/WritingHistoryRecorder.swift b/KeyType/Logic/Telemetry/WritingHistoryRecorder.swift
@@ -122,6 +122,10 @@ final class WritingHistoryRecorder {
         guard sample.text.trimmingCharacters(
             in: .whitespacesAndNewlines
         ).count >= minimumCharacters else { return }
+        // Belt-and-suspenders junk gate: skip entries that aren't prose (bare URLs, UUID blobs,
+        // filesystem paths) before they reach the encrypted DB. Mirrored in WritingHistorySelection
+        // for samples already on disk from before this guard was introduced.
+        guard WritingHistoryFilter.isProse(sample.text) else { return }
 
         // Re-resolve the policy from the captured metadata: secure/sensitive fields and apps that
         // disable training-data collection must never contribute samples.

diff --git a/KeyTypeTests/KeyTypeTests.swift b/KeyTypeTests/KeyTypeTests.swift
@@ -273,6 +273,48 @@ struct KeyTypeTests {
         #expect(advanced == nil)
     }
 
+    // MARK: - Reuse re-check (H2)
+
+    @Test func reuseRejectsRemainingThatRepeatsRecentlyTypedText() {
+        // As the user types through a cached suggestion, beforeCursor grows; a tail that becomes a
+        // verbatim repetition of just-typed text must not be re-shown via reuse.
+        let context = TextFieldContext(
+            beforeCursor: "You can use it to access the OpenAI. And",
+            target: Self.target
+        )
+        #expect(
+            CompletionController.reuseRemainingIsSafe(
+                remaining: " you can use it to access the OpenAI again",
+                context: context,
+                injectedContext: []
+            ) == false
+        )
+    }
+
+    @Test func reuseRejectsRemainingThatEchoesInjectedClipboard() {
+        // A cached completion (clean at anchor time) must not be re-shown if it now parrots the
+        // currently-injected clipboard/OCR context.
+        let context = TextFieldContext(beforeCursor: "Hi Molly,", target: Self.target)
+        #expect(
+            CompletionController.reuseRemainingIsSafe(
+                remaining: " if you require maintenance of UPS systems or",
+                context: context,
+                injectedContext: ["if you require maintenance of UPS systems or backup power, call us."]
+            ) == false
+        )
+    }
+
+    @Test func reuseAllowsGenuineRemaining() {
+        let context = TextFieldContext(beforeCursor: "Hi Molly,", target: Self.target)
+        #expect(
+            CompletionController.reuseRemainingIsSafe(
+                remaining: " hope you are doing well today",
+                context: context,
+                injectedContext: ["if you require maintenance of UPS systems or backup power, call us."]
+            )
+        )
+    }
+
     @Test func promotionCachePromotesLowerRankedBranchWhenTopIsInvalidated() {
         let cache = Self.promotionCache(candidates: [
             "ship it today",

diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AlphanumericNormalizer.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AlphanumericNormalizer.swift
@@ -0,0 +1,16 @@
+import Foundation
+
+/// Shared text normalization for the content-overlap guards (`SuffixOverlapGuard`,
+/// `PrefixRepetitionGuard`, `ContextEchoGuard`). Comparisons are done on case-folded alphanumeric
+/// scalars only, so differences in whitespace, punctuation, and stray symbol glyphs the model
+/// sometimes prepends ("**", "•") don't defeat a match.
+enum AlphanumericNormalizer {
+    /// Case-folded string of only the alphanumeric scalars in `text`.
+    static func normalize(_ text: String) -> String {
+        var result = String.UnicodeScalarView()
+        for scalar in text.lowercased().unicodeScalars where CharacterSet.alphanumerics.contains(scalar) {
+            result.append(scalar)
+        }
+        return String(result)
+    }
+}
diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift
@@ -135,21 +135,29 @@ public struct CompletionRequest: Equatable {
     public var mode: CompletionMode
     public var maxCompletionTokens: Int
     public var maxDisplayWidth: Int
+    /// Side-context text injected into the prompt that the user did NOT type — clipboard contents and
+    /// on-screen OCR text. Carried alongside the request so the output filter can drop a completion
+    /// that merely parrots it verbatim (`ContextEchoGuard`). Writing-history samples are deliberately
+    /// excluded: they are scoped to the same app/domain and reproducing the user's own recurring
+    /// phrases is the point of that feature.
+    public var injectedContext: [String]
 
     public init(
         context: TextFieldContext,
         prompt: String,
         requiredPrefixBytes: [UInt8] = [],
         mode: CompletionMode = .prose,
         maxCompletionTokens: Int = 4,
-        maxDisplayWidth: Int = 80
+        maxDisplayWidth: Int = 80,
+        injectedContext: [String] = []
     ) {
         self.context = context
         self.prompt = prompt
         self.requiredPrefixBytes = requiredPrefixBytes
         self.mode = mode
         self.maxCompletionTokens = maxCompletionTokens
         self.maxDisplayWidth = maxDisplayWidth
+        self.injectedContext = injectedContext
     }
 }
 
@@ -202,6 +210,26 @@ public enum SuppressionReason: Equatable {
     /// A mid-line / fill-in-the-middle completion that is too long or too low-probability to show
     /// without risking a wrong suggestion.
     case lowConfidenceMidLine
+    /// The completion reproduces a phrase that is already present in the recent text before the caret.
+    /// Accepting it would create a verbatim repetition loop. See `PrefixRepetitionGuard`.
+    case repeatsRecentPrefix
+    /// The completion verbatim-reproduces a span of injected side context the user did not type
+    /// (clipboard, on-screen OCR text) — the small model parroting context instead of predicting.
+    /// See `ContextEchoGuard`.
+    case echoesInjectedContext
+    /// The completion contains a reserved model-internal marker (e.g. Gemma's `<unused56>`, chat/FIM
+    /// scaffolding) that should have been masked at sample time. Belt-and-suspenders for stale or
+    /// mis-flagged token profiles. See `TokenClassifier` / `DefaultCandidateFilter.containsReservedMarker`.
+    case reservedMarker
+    /// The completion contains a within-candidate token-repetition loop — the same word appears ≥ 3 times
+    /// ("text 1 1 1", "since 1 1 1"). Model degeneration, not a bleed from side context.
+    /// See `IntraCompletionRepetitionGuard`.
+    case intraCompletionRepetition
+    /// The completion is nothing but markup tags (`</code>`, `<b>`, …) in a prose/correction context
+    /// whose surrounding text contains no markup — Gemma's single-token HTML-tag block surfacing in
+    /// ordinary writing. Sample-time demotion is the primary defence (see
+    /// `BiasPolicy.markupTagStaticPenalty`); this is its context-aware output net. See `MarkupTagGuard`.
+    case markupTagOutsideMarkupContext
     case noCandidate
 }