samfoy · samfoy · Jun 10, 2026
@@ -1,6 +1,6 @@
 {
   "name": "@samfp/pi-memory",
-  "version": "1.3.3",
+  "version": "1.4.0",
   "description": "Persistent memory for pi — learns corrections, preferences, and patterns from sessions and injects them into future conversations.",
   "keywords": [
     "pi-package",
@@ -34,6 +34,9 @@
     "dev": "esbuild src/index.ts --bundle --platform=node --format=esm --outfile=dist/index.js --sourcemap --watch --packages=external --external:@mariozechner/pi-coding-agent --external:@earendil-works/pi-coding-agent --external:@sinclair/typebox",
     "prepare": "npm run build"
   },
+  "dependencies": {
+    "@xenova/transformers": "^2.17.0"
+  },
   "peerDependencies": {
     "@earendil-works/pi-coding-agent": "*",
     "@sinclair/typebox": "*"

@@ -0,0 +1,101 @@
+/**
+ * Lazy embedding pipeline using @xenova/transformers (optional dependency).
+ * Gracefully degrades to FTS-only when the package is unavailable or the
+ * model fails to load.
+ *
+ * Model: Xenova/all-MiniLM-L6-v2 (quantized int8, ~6 MB download, 384 dims).
+ * Cached in ~/.cache/huggingface/hub/ after first download.
+ */
+
+const MODEL = "Xenova/all-MiniLM-L6-v2";
+const LOAD_TIMEOUT_MS = 30_000;
+const INFER_TIMEOUT_MS = 5_000;
+const TEXT_CHAR_LIMIT = 512;
+
+let _pipe: unknown = null;
+let _failed = false;
+
+async function getPipe(): Promise<unknown> {
+  if (_failed) return null;
+  if (_pipe) return _pipe;
+  try {
+    // Dynamic import — @xenova/transformers is optional; catch if absent.
+    // String variable prevents TypeScript from resolving the type (it's optional).
+    const pkg = "@xenova/transformers";
+    const mod = await import(pkg).catch(() => null) as any;
+    if (!mod) {
+      console.error("pi-memory: @xenova/transformers not installed, semantic search disabled");
+      _failed = true;
+      return null;
+    }
+    const { pipeline, env } = mod;
+    env.allowRemoteModels = true;
+    env.useBrowserCache = false;
+    _pipe = await withTimeout(
+      pipeline("feature-extraction", MODEL, { quantized: true }),
+      LOAD_TIMEOUT_MS,
+      "model load",
+    );
+    return _pipe;
+  } catch (err: unknown) {
+    console.error(`pi-memory: embedder unavailable (${(err as any)?.message ?? err}), using FTS-only`);
+    _failed = true;
+    return null;
+  }
+}
+
+/** Compute a normalized embedding for text. Returns null on any failure. */
+export async function embed(text: string): Promise<Float32Array | null> {
+  const pipe = await getPipe();
+  if (!pipe) return null;
+  try {
+    const out = await withTimeout(
+      (pipe as any)(text.slice(0, TEXT_CHAR_LIMIT), { pooling: "mean", normalize: true }),
+      INFER_TIMEOUT_MS,
+      "inference",
+    );
+    return new Float32Array((out as any).data);
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Cosine similarity of two normalized unit vectors (dot product).
+ * Both vectors must have been produced with normalize:true.
+ */
+export function similarity(a: Float32Array, b: Float32Array): number {
+  let dot = 0;
+  const len = Math.min(a.length, b.length);
+  for (let i = 0; i < len; i++) dot += a[i] * b[i];
+  return dot;
+}
+
+/**
+ * Serialize a Float32Array to a Buffer for SQLite BLOB storage.
+ * Creates a copy to avoid shared-buffer aliasing issues.
+ */
+export function toBlob(v: Float32Array): Buffer {
+  return Buffer.from(new Uint8Array(v.buffer, v.byteOffset, v.byteLength));
+}
+
+/**
+ * Deserialize a SQLite BLOB back to Float32Array.
+ * Returns null for null/undefined input.
+ * Uses Uint8Array.from to produce a fresh, owned ArrayBuffer — safe when
+ * node:sqlite returns a Buffer whose .buffer is a shared backing store.
+ */
+export function fromBlob(b: Buffer | null | undefined): Float32Array | null {
+  if (!b) return null;
+  const raw = Uint8Array.from(b); // copy — handles non-zero byteOffset
+  return new Float32Array(raw.buffer);
+}
+
+function withTimeout<T>(p: Promise<T>, ms: number, label: string): Promise<T> {
+  return Promise.race([
+    p,
+    new Promise<never>((_, reject) =>
+      setTimeout(() => reject(new Error(`${label} timeout after ${ms}ms`)), ms),
+    ),
+  ]);
+}
@@ -24,6 +24,14 @@ import { homedir } from "node:os";
 import { readFileSync } from "node:fs";
 import { MemoryStore } from "./store.js";
 import { buildContextBlock, projectSlug, type InjectorConfig } from "./injector.js";
+import { embed } from "./embedder.js";
+
+// Re-export internals so consumers (e.g. pi-dashboard's system-prompt route)
+// can build their own context blocks without reaching into ./dist/store.js.
+// The bundled `dist/index.js` inlines these, so prior `req('./dist/store.js')`
+// callers were always broken.
+export { MemoryStore } from "./store.js";
+export { buildContextBlock, projectSlug, type InjectorConfig } from "./injector.js";
 
 type ToolResult = AgentToolResult<unknown>;
 function ok(text: string): ToolResult { return { content: [{ type: "text", text }], details: {} }; }
@@ -249,11 +257,8 @@ export default function (pi: ExtensionAPI) {
       }
 
       // Inject stored memory as a one-shot custom message BEFORE any user
-      // message arrives. Matches pi-knowledge-search's pattern.
-      //
-      // Skipped when `perTurnInjection: true` — in that mode the
-      // before_agent_start handler below takes over with per-turn semantic
-      // matching via systemPrompt mutation.
+      // message arrives. Only used when `perTurnInjection: false` is explicitly
+      // configured (session_start mode, opt-out from adaptive injection).
       //
       // Historical note: v1.0.x mutated event.systemPrompt in before_agent_start.
       // That broke provider prefix caches on every turn boundary (any drift in
@@ -267,9 +272,17 @@ export default function (pi: ExtensionAPI) {
       // lessons, 8KB cap). Correct ordering, stable cache, simpler model.
       //
       // v1.3.x adds `perTurnInjection: true` as an opt-in to restore v1.0.x
-      // per-turn selective behavior (mutates systemPrompt, breaks cache on
-      // every turn boundary — users opt in knowing the tradeoff).
-      if (!injectorConfig.perTurnInjection) {
+      // per-turn selective behavior.
+      //
+      // v1.4.0 flips the default: per-turn semantic injection via systemPrompt
+      // mutation in before_agent_start.
+      //
+      // v1.5.0 introduces injectionMode: "context-hook" as the new default.
+      // Memory is injected as an ephemeral message via the context hook instead
+      // of mutating systemPrompt. System prompt is now permanently stable,
+      // guaranteeing cache hits on the system prompt prefix regardless of topic.
+      // The session_start fallback dump is opt-in via `perTurnInjection: false`.
+      if (injectorConfig.perTurnInjection === false) {
         try {
           const alreadyInjected = ctx.sessionManager
             .getEntries()
@@ -278,7 +291,7 @@ export default function (pi: ExtensionAPI) {
                 e.type === "custom_message" && e.customType === "pi-memory-context",
             );
           if (!alreadyInjected) {
-            const { text, stats: injStats } = buildContextBlock(
+            const { text, stats: injStats } = await buildContextBlock(
               store,
               sessionCwd,
               undefined, // no prompt → fallback: dump all relevant memory
@@ -303,22 +316,26 @@ export default function (pi: ExtensionAPI) {
   });
 
   // ----------------------------------------------------------------
-  // Opt-in per-turn selective injection (v1.3.0).
+  // Per-turn semantic injection (v1.4.0 default).
+  //
+  // Runs on every user turn, injecting memories relevant to the current
+  // prompt into event.systemPrompt. This is now the DEFAULT behavior;
+  // session_start fallback mode requires `perTurnInjection: false`.
+  //
+  // Cache stability: when the same entries are relevant across consecutive
+  // turns (stable topic), the injected text is identical and the provider's
+  // prefix cache hits. Entries are sorted deterministically in the injector
+  // so identical sets always produce identical text.
   //
-  // When `perTurnInjection: true` is set, run a semantic search against the
-  // current user prompt and append matching memory to event.systemPrompt.
   // MUST use systemPrompt (not { message }) — returning { message } puts the
   // content AFTER the user message and causes the model to respond to the
   // injected memory instead of the user. See v1.1.x postmortem.
-  //
-  // This breaks provider prefix caches on every turn boundary — an accepted
-  // cost for users who want per-query relevance from large memory stores.
   // ----------------------------------------------------------------
   pi.on("before_agent_start", async (event, ctx) => {
     if (!store) return;
-    if (!injectorConfig.perTurnInjection) return;
+    if (injectorConfig.perTurnInjection === false) return;
 
-    const { text } = buildContextBlock(store, ctx.cwd, event.prompt, injectorConfig);
+    const { text } = await buildContextBlock(store, ctx.cwd, event.prompt, injectorConfig);
     if (!text) return;
 
     return {
@@ -515,6 +532,13 @@ export default function (pi: ExtensionAPI) {
           return ok("Both key and value required for facts");
         }
         store.setSemantic(params.key, params.value, 0.95, "user");
+        // Fire-and-forget: compute and store embedding for the new/updated entry
+        // so it's available for semantic search in future sessions.
+        const _key = params.key as string;
+        const _val = params.value as string;
+        embed(`${_key.split(".").slice(1).join(" ")} ${_val}`)
+          .then(vec => { if (vec) store!.setEmbedding(_key, vec); })
+          .catch(() => {});
         return ok(`Remembered: ${params.key} = ${params.value}`);
       }
 

@@ -20,133 +20,133 @@ describe("buildContextBlock", () => {
     rmSync(tmpDir, { recursive: true, force: true });
   });
 
-  it("returns empty for empty store", () => {
-    const { text, stats } = buildContextBlock(store);
+  it("returns empty for empty store", async () => {
+    const { text, stats } = await buildContextBlock(store);
     assert.equal(text, "");
     assert.equal(stats.semantic, 0);
     assert.equal(stats.lessons, 0);
   });
 
-  it("includes preferences in fallback mode (no prompt)", () => {
+  it("includes preferences in fallback mode (no prompt)", async () => {
     store.setSemantic("pref.editor", "vim", 0.9, "user");
-    const { text, stats } = buildContextBlock(store);
+    const { text, stats } = await buildContextBlock(store);
     assert.ok(text.includes("User Preferences"));
     assert.ok(text.includes("editor: vim"));
     assert.ok(stats.semantic > 0);
   });
 
-  it("includes lessons with DON'T prefix for negative", () => {
+  it("includes lessons with DON'T prefix for negative", async () => {
     store.addLesson("Use sed for daily notes", "vault", "user", true);
-    const { text } = buildContextBlock(store);
+    const { text } = await buildContextBlock(store);
     assert.ok(text.includes("Learned Corrections"));
     assert.ok(text.includes("DON'T:"));
   });
 
-  it("wraps in <memory> tags", () => {
-    const { text } = buildContextBlock(store);
+  it("wraps in <memory> tags", async () => {
+    const { text } = await buildContextBlock(store);
     assert.ok(text.startsWith("<memory>"));
     assert.ok(text.endsWith("</memory>"));
   });
 
-  it("scopes project context to cwd in fallback mode", () => {
+  it("scopes project context to cwd in fallback mode", async () => {
     store.setSemantic("project.rosie.lang", "java", 0.9, "consolidation");
     store.setSemantic("project.other.lang", "python", 0.5, "consolidation");
 
-    const { text } = buildContextBlock(store, "/workplace/samfp/Rosie");
+    const { text } = await buildContextBlock(store, "/workplace/samfp/Rosie");
     assert.ok(text.includes("rosie.lang"));
     assert.ok(!text.includes("other.lang"));
   });
 
-  it("fallback: excludes other-project facts even when user-set (confidence 0.95)", () => {
+  it("fallback: excludes other-project facts even when user-set (confidence 0.95)", async () => {
     // User-set facts have confidence 0.95 — the old code included ALL such
     // facts via `|| p.confidence >= 0.9`, bleeding unrelated project context.
     store.setSemantic("project.rise.hosting", "GitLab — use glab CLI", 0.95, "user");
     store.setSemantic("project.ttrpg.npc", "Read Mechanics/Goons.md before generating combat stats", 0.95, "user");
     store.setSemantic("project.myapp.lang", "typescript", 0.95, "user");
 
-    const { text } = buildContextBlock(store, "/home/user/projects/myapp");
+    const { text } = await buildContextBlock(store, "/home/user/projects/myapp");
     assert.ok(text.includes("myapp.lang"), "should include current project fact");
     assert.ok(!text.includes("rise.hosting"), "should NOT include rise facts in myapp session");
     assert.ok(!text.includes("ttrpg.npc"), "should NOT include ttrpg facts in myapp session");
   });
 
-  it("fallback: exact slug match — short slug does not match longer key segment", () => {
+  it("fallback: exact slug match — short slug does not match longer key segment", async () => {
     // Regression: old substring check `key.includes('pi')` matched 'project.pipefittingjobs.*'
     store.setSemantic("project.pipefittingjobs.source", "adzuna + jooble", 0.9, "user");
     store.setSemantic("project.pi-memory.store", "sqlite via node:sqlite", 0.9, "user");
 
     // In a session with cwd slug 'pi' (project named just 'pi'),
     // pipefittingjobs should NOT appear.
     // We simulate a cwd whose slug resolves to 'pi' exactly.
-    const { text } = buildContextBlock(store, "/home/user/projects/pi");
+    const { text } = await buildContextBlock(store, "/home/user/projects/pi");
     assert.ok(!text.includes("pipefittingjobs.source"), "slug 'pi' should not match 'pipefittingjobs'");
     // pi-memory also shouldn't match 'pi' slug (different slug: 'pi-memory')
     assert.ok(!text.includes("pi-memory.store"), "slug 'pi' should not match 'pi-memory'");
   });
 
   // ─── Selective injection tests ───────────────────────────────────
 
-  it("selective: searches by prompt and returns relevant entries", () => {
+  it("selective: searches by prompt and returns relevant entries", async () => {
     store.setSemantic("pref.commit_style", "conventional commits", 0.9, "user");
     store.setSemantic("project.rosie.di", "Dagger dependency injection", 0.95, "consolidation");
     store.setSemantic("tool.sed", "use for daily note insertion", 0.9, "consolidation");
 
-    const { text, stats } = buildContextBlock(store, undefined, "how do I make commits");
+    const { text, stats } = await buildContextBlock(store, undefined, "how do I make commits");
     assert.ok(text.includes("Relevant Memory"));
     assert.ok(text.includes("commit"));
     assert.ok(stats.semantic > 0);
   });
 
-  it("selective: always includes lessons regardless of prompt", () => {
-    const { text } = buildContextBlock(store, undefined, "something totally unrelated xyz");
+  it("selective: always includes lessons regardless of prompt", async () => {
+    const { text } = await buildContextBlock(store, undefined, "something totally unrelated xyz");
     assert.ok(text.includes("Learned Corrections"));
     assert.ok(text.includes("DON'T:"));
   });
 
-  it("selective: filters lessons by relevance when config is selective", () => {
+  it("selective: filters lessons by relevance when config is selective", async () => {
     // Add lessons in different categories
     store.addLesson("Always verify exploit PoC before submission", "bug-bounty", "user", false);
     store.addLesson("Use conventional commits for all projects", "general", "user", false);
     store.addLesson("Never fabricate competitor claims in blog posts", "writing", "user", true);
 
     // With selective mode and a bug bounty prompt, should get bug-bounty + general lessons
     // FTS matches "bounty" against the category field and "exploit" against rule text
-    const { text: bbText } = buildContextBlock(store, undefined, "found an exploit on the bug bounty target", { lessonInjection: "selective" });
+    const { text: bbText } = await buildContextBlock(store, undefined, "found an exploit on the bug bounty target", { lessonInjection: "selective" });
     assert.ok(bbText.includes("verify exploit"), "should include bug-bounty lesson for bounty prompt");
     assert.ok(bbText.includes("conventional commits"), "should include general lessons");
 
     // With selective mode and a writing prompt, should get writing + general lessons
-    const { text: writeText } = buildContextBlock(store, undefined, "write a blog post about testing", { lessonInjection: "selective" });
+    const { text: writeText } = await buildContextBlock(store, undefined, "write a blog post about testing", { lessonInjection: "selective" });
     assert.ok(writeText.includes("fabricate"), "should include writing lesson for blog prompt");
     assert.ok(writeText.includes("conventional commits"), "should include general lessons");
   });
 
-  it("selective: mode 'all' still includes all lessons", () => {
-    const { text } = buildContextBlock(store, undefined, "something totally unrelated xyz", { lessonInjection: "all" });
+  it("selective: mode 'all' still includes all lessons", async () => {
+    const { text } = await buildContextBlock(store, undefined, "something totally unrelated xyz", { lessonInjection: "all" });
     assert.ok(text.includes("Learned Corrections"));
     assert.ok(text.includes("DON'T:"));
   });
 
-  it("selective: excludes other-project facts even when FTS text matches", () => {
+  it("selective: excludes other-project facts even when FTS text matches", async () => {
     // Simulate a scenario where a Prisma-related fact from project 'rise' could
     // match a prompt about prisma, but we're in a different project.
     store.setSemantic("project.rise.testing", "use fabricca (from @repo/prisma/testing) for fixtures", 0.95, "user");
     store.setSemantic("project.myapp.orm", "prisma with postgres", 0.95, "user");
 
     // In myapp context: prompt mentions prisma — should get myapp fact, not rise
-    const { text } = buildContextBlock(store, "/home/user/projects/myapp", "how do I set up prisma migrations");
+    const { text } = await buildContextBlock(store, "/home/user/projects/myapp", "how do I set up prisma migrations");
     assert.ok(text.includes("myapp.orm"), "should include current project prisma fact");
     assert.ok(!text.includes("rise.testing"), "should NOT include rise's prisma fact in myapp session");
   });
 
-  it("selective: includes project context when cwd matches", () => {
-    const { text } = buildContextBlock(store, "/workplace/samfp/Rosie", "how do I build");
+  it("selective: includes project context when cwd matches", async () => {
+    const { text } = await buildContextBlock(store, "/workplace/samfp/Rosie", "how do I build");
     // Should find rosie entries via project slug search
     assert.ok(text.includes("rosie"));
   });
 
-  it("selective: returns only lessons when prompt matches nothing", () => {
-    const { text, stats } = buildContextBlock(store, undefined, "zzzzqqqq xyzzy nonsense");
+  it("selective: returns only lessons when prompt matches nothing", async () => {
+    const { text, stats } = await buildContextBlock(store, undefined, "zzzzqqqq xyzzy nonsense");
     // No semantic hits, but lessons should still be there
     assert.ok(text.includes("Learned Corrections"));
     assert.equal(stats.semantic, 0);