ccage-simp · ccage-simp · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/README.md b/README.md
@@ -515,6 +515,35 @@ Supported model families:
 > since vectors are not cross-compatible between models. The prompt format is
 > automatically adjusted for each model family.
 
+### OpenAI Embedding Provider (Prototype)
+
+QMD can also use OpenAI for embeddings while keeping query expansion and reranking
+on the local GGUF models. Provider selection is env-driven for now.
+
+```sh
+export QMD_EMBED_PROVIDER="openai"
+export OPENAI_API_KEY="sk-..."
+export QMD_EMBED_MODEL="text-embedding-3-small"
+
+# Optional:
+export QMD_OPENAI_BASE_URL="https://api.openai.com/v1"
+export QMD_OPENAI_EMBED_DIMENSIONS="1024"
+export OPENAI_ORG_ID="org_..."
+export OPENAI_PROJECT_ID="proj_..."
+
+qmd embed -f
+```
+
+Notes:
+- `QMD_EMBED_PROVIDER=openai` switches only the embedding path. Reranking and query expansion remain local.
+- `QMD_EMBED_MODEL` should be the OpenAI embedding model name when using the OpenAI provider.
+- `QMD_OPENAI_BASE_URL` overrides the embeddings endpoint base URL. `OPENAI_BASE_URL` is also accepted.
+- `QMD_OPENAI_EMBED_DIMENSIONS` becomes part of the embedding compatibility key, so changing it also requires `qmd embed -f`.
+- Switching provider, model, or dimensions requires a full re-embed because stored vectors are only compatible with the active embedding configuration.
+
+If you use YAML or SDK config, `models.embed` can hold the embedding model string,
+but provider selection still comes from the environment in this prototype.
+
 ## Installation
 
 ```sh

diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts
@@ -78,7 +78,7 @@ import {
   type ReindexResult,
   type ChunkStrategy,
 } from "../store.js";
-import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
+import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, getDefaultEmbeddingProvider, setDefaultLlamaCpp, setDefaultEmbeddingProvider, createEmbeddingProvider, LlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
 import {
   formatSearchResults,
   formatDocuments,
@@ -121,10 +121,15 @@ function getStore(): ReturnType<typeof createStore> {
       const config = loadConfig();
       syncConfigToDb(store.db, config);
       if (config.models) {
-        setDefaultLlamaCpp(new LlamaCpp({
+        const llm = new LlamaCpp({
           embedModel: config.models.embed,
           generateModel: config.models.generate,
           rerankModel: config.models.rerank,
+        });
+        setDefaultLlamaCpp(llm);
+        setDefaultEmbeddingProvider(createEmbeddingProvider({
+          embedModel: config.models.embed,
+          localProvider: llm,
         }));
       }
     } catch {
@@ -234,10 +239,22 @@ function formatETA(seconds: number): string {
   return `${Math.floor(seconds / 3600)}h ${Math.floor((seconds % 3600) / 60)}m`;
 }
 
+function resolveActiveEmbeddingModelForStore(store: ReturnType<typeof createStore>): string {
+  const provider = store.embeddingProvider ?? getDefaultEmbeddingProvider();
+  return provider.compatibilityKey ?? provider.modelId;
+}
+
+function resolveEmbeddingDisplayModelForStore(store: ReturnType<typeof createStore>): string {
+  const provider = store.embeddingProvider ?? getDefaultEmbeddingProvider();
+  return provider.modelId;
+}
 
 // Check index health and print warnings/tips
-function checkIndexHealth(db: Database): void {
-  const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db);
+function checkIndexHealth(store: ReturnType<typeof createStore>): void {
+  const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(
+    store.db,
+    resolveActiveEmbeddingModelForStore(store),
+  );
 
   // Warn if many docs need embedding
   if (needsEmbedding > 0) {
@@ -317,7 +334,10 @@ function formatBytes(bytes: number): string {
 
 async function showStatus(): Promise<void> {
   const dbPath = getDbPath();
-  const db = getDb();
+  const store = getStore();
+  const db = store.db;
+  const activeEmbeddingModel = resolveActiveEmbeddingModelForStore(store);
+  const embeddingDisplayModel = resolveEmbeddingDisplayModelForStore(store);
 
   // Collections are defined in YAML; no duplicate cleanup needed.
   // Collections are defined in YAML; no duplicate cleanup needed.
@@ -335,7 +355,7 @@ async function showStatus(): Promise<void> {
   // Overall stats
   const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get() as { count: number };
   const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get() as { count: number };
-  const needsEmbedding = getHashesNeedingEmbedding(db);
+  const needsEmbedding = getHashesNeedingEmbedding(db, activeEmbeddingModel);
 
   // Most recent update across all collections
   const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get() as { latest: string | null };
@@ -462,7 +482,7 @@ async function showStatus(): Promise<void> {
       return match ? `https://huggingface.co/${match[1]}` : uri;
     };
     console.log(`\n${c.bold}Models${c.reset}`);
-    console.log(`  Embedding:   ${hfLink(DEFAULT_EMBED_MODEL_URI)}`);
+    console.log(`  Embedding:   ${hfLink(embeddingDisplayModel)}`);
     console.log(`  Reranking:   ${hfLink(DEFAULT_RERANK_MODEL_URI)}`);
     console.log(`  Generation:  ${hfLink(DEFAULT_GENERATE_MODEL_URI)}`);
   }
@@ -622,7 +642,7 @@ async function updateCollections(): Promise<void> {
   }
 
   // Check if any documents need embedding (show once at end)
-  const needsEmbedding = getHashesNeedingEmbedding(db);
+  const needsEmbedding = getHashesNeedingEmbedding(db, resolveActiveEmbeddingModelForStore(storeInstance));
   closeDb();
 
   console.log(`${c.green}✓ All collections updated.${c.reset}`);
@@ -1514,6 +1534,8 @@ function collectionRename(oldName: string, newName: string): void {
 
 async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, collectionName?: string, suppressEmbedNotice: boolean = false, ignorePatterns?: string[]): Promise<void> {
   const db = getDb();
+  const storeInstance = getStore();
+  const activeModelKey = resolveActiveEmbeddingModelForStore(storeInstance);
   const resolvedPwd = pwd || getPwd();
   const now = new Date().toISOString();
   const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"];
@@ -1635,7 +1657,7 @@ async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, coll
   const orphanedContent = cleanupOrphanedContent(db);
 
   // Check if vector index needs updating
-  const needsEmbedding = getHashesNeedingEmbedding(db);
+  const needsEmbedding = getHashesNeedingEmbedding(db, activeModelKey);
 
   progress.clear();
   console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`);
@@ -1674,26 +1696,29 @@ function parseChunkStrategy(value: unknown): ChunkStrategy | undefined {
 }
 
 async function vectorIndex(
-  model: string = DEFAULT_EMBED_MODEL_URI,
+  model?: string,
   force: boolean = false,
   batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy },
 ): Promise<void> {
   const storeInstance = getStore();
   const db = storeInstance.db;
+  const embedProvider = storeInstance.embeddingProvider ?? getDefaultEmbeddingProvider();
+  const activeModelKey = model ?? embedProvider.compatibilityKey ?? embedProvider.modelId;
+  const displayModel = model ?? embedProvider.modelId;
 
   if (force) {
     console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
   }
 
   // Check if there's work to do before starting
-  const hashesToEmbed = getHashesNeedingEmbedding(db);
+  const hashesToEmbed = getHashesNeedingEmbedding(db, activeModelKey);
   if (hashesToEmbed === 0 && !force) {
     console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
     closeDb();
     return;
   }
 
-  console.log(`${c.dim}Model: ${model}${c.reset}\n`);
+  console.log(`${c.dim}Model: ${displayModel}${c.reset}\n`);
   if (batchOptions?.maxDocsPerBatch !== undefined || batchOptions?.maxBatchBytes !== undefined) {
     const maxDocsPerBatch = batchOptions.maxDocsPerBatch ?? DEFAULT_EMBED_MAX_DOCS_PER_BATCH;
     const maxBatchBytes = batchOptions.maxBatchBytes ?? DEFAULT_EMBED_MAX_BATCH_BYTES;
@@ -1803,6 +1828,7 @@ type OutputOptions = {
   candidateLimit?: number;  // Max candidates to rerank (default: 40)
   intent?: string;       // Domain intent for disambiguation
   skipRerank?: boolean;  // Skip LLM reranking, use RRF scores only
+  skipExpand?: boolean;  // Skip query expansion, search only the original query
   chunkStrategy?: ChunkStrategy;  // "auto" (default) or "regex"
 };
 
@@ -2284,47 +2310,46 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string =
   const collectionNames = resolveCollectionFilter(opts.collection, true);
   const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
 
-  checkIndexHealth(store.db);
-
-  await withLLMSession(async () => {
-    let results = await vectorSearchQuery(store, query, {
-      collection: singleCollection,
-      limit: opts.all ? 500 : (opts.limit || 10),
-      minScore: opts.minScore || 0.3,
-      intent: opts.intent,
-      hooks: {
-        onExpand: (original, expanded) => {
-          logExpansionTree(original, expanded);
-          process.stderr.write(`${c.dim}Searching ${expanded.length + 1} vector queries...${c.reset}\n`);
-        },
+  checkIndexHealth(store);
+
+  let results = await vectorSearchQuery(store, query, {
+    collection: singleCollection,
+    limit: opts.all ? 500 : (opts.limit || 10),
+    minScore: opts.minScore || 0.3,
+    intent: opts.intent,
+    skipExpand: opts.skipExpand,
+    hooks: opts.skipExpand ? undefined : {
+      onExpand: (original, expanded) => {
+        logExpansionTree(original, expanded);
+        process.stderr.write(`${c.dim}Searching ${expanded.length + 1} vector queries...${c.reset}\n`);
       },
-    });
+    },
+  });
 
-    // Post-filter for multi-collection
-    if (collectionNames.length > 1) {
-      results = results.filter(r => {
-        const prefixes = collectionNames.map(n => `qmd://${n}/`);
-        return prefixes.some(p => r.file.startsWith(p));
-      });
-    }
+  // Post-filter for multi-collection
+  if (collectionNames.length > 1) {
+    results = results.filter(r => {
+      const prefixes = collectionNames.map(n => `qmd://${n}/`);
+      return prefixes.some(p => r.file.startsWith(p));
+    });
+  }
 
-    closeDb();
+  closeDb();
 
-    if (results.length === 0) {
-      printEmptySearchResults(opts.format);
-      return;
-    }
+  if (results.length === 0) {
+    printEmptySearchResults(opts.format);
+    return;
+  }
 
-    outputResults(results.map(r => ({
-      file: r.file,
-      displayPath: r.displayPath,
-      title: r.title,
-      body: r.body,
-      score: r.score,
-      context: r.context,
-      docid: r.docid,
-    })), query, { ...opts, limit: results.length });
-  }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
+  outputResults(results.map(r => ({
+    file: r.file,
+    displayPath: r.displayPath,
+    title: r.title,
+    body: r.body,
+    score: r.score,
+    context: r.context,
+    docid: r.docid,
+  })), query, { ...opts, limit: results.length });
 }
 
 async function querySearch(query: string, opts: OutputOptions, _embedModel: string = DEFAULT_EMBED_MODEL, _rerankModel: string = DEFAULT_RERANK_MODEL): Promise<void> {
@@ -2335,7 +2360,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
   const collectionNames = resolveCollectionFilter(opts.collection, true);
   const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;
 
-  checkIndexHealth(store.db);
+  checkIndexHealth(store);
 
   // Check for structured query syntax (lex:/vec:/hyde:/intent: prefixes)
   const parsed = parseStructuredQuery(query);
@@ -2512,6 +2537,7 @@ function parseCLI() {
       // Query options
       "candidate-limit": { type: "string", short: "C" },
       "no-rerank": { type: "boolean", default: false },
+      "no-expand": { type: "boolean", default: false },
       intent: { type: "string" },
       // Chunking options
       "chunk-strategy": { type: "string" },  // "regex" (default) or "auto" (AST for code files)
@@ -2554,6 +2580,7 @@ function parseCLI() {
     lineNumbers: !!values["line-numbers"],
     candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
     skipRerank: !!values["no-rerank"],
+    skipExpand: !!values["no-expand"],
     explain: !!values.explain,
     intent: values.intent as string | undefined,
     chunkStrategy: parseChunkStrategy(values["chunk-strategy"]),
@@ -2776,6 +2803,7 @@ function showHelp(): void {
   console.log("  --full                     - Output full document instead of snippet");
   console.log("  -C, --candidate-limit <n>  - Max candidates to rerank (default 40, lower = faster)");
   console.log("  --no-rerank                - Skip LLM reranking (use RRF scores only, much faster on CPU)");
+  console.log("  --no-expand                - Skip local query expansion (search only the original query)");
   console.log("  --line-numbers             - Include line numbers in output");
   console.log("  --explain                  - Include retrieval score traces (query --json/CLI)");
   console.log("  --files | --json | --csv | --md | --xml  - Output format");
@@ -3107,7 +3135,7 @@ if (isMain) {
         const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
         const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
         const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
-        await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
+        await vectorIndex(undefined, !!cli.values.force, {
           maxDocsPerBatch,
           maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
           chunkStrategy: embedChunkStrategy,

diff --git a/src/index.ts b/src/index.ts
@@ -23,7 +23,6 @@ import {
   structuredSearch,
   extractSnippet,
   addLineNumbers,
-  DEFAULT_EMBED_MODEL,
   DEFAULT_MULTI_GET_MAX_BYTES,
   reindexCollection,
   generateEmbeddings,
@@ -66,6 +65,7 @@ import {
 } from "./store.js";
 import {
   LlamaCpp,
+  createEmbeddingProvider,
 } from "./llm.js";
 import {
   setConfigSource,
@@ -375,6 +375,14 @@ export async function createStore(options: StoreOptions): Promise<QMDStore> {
     disposeModelsOnInactivity: true,
   });
   internal.llm = llm;
+  internal.embeddingProvider = createEmbeddingProvider({
+    embedModel: config?.models?.embed,
+    localProvider: llm,
+  });
+  const resolveActiveEmbedModel = () =>
+    internal.embeddingProvider?.compatibilityKey ??
+    internal.embeddingProvider?.modelId ??
+    llm.compatibilityKey;
 
   const store: QMDStore = {
     internal,
@@ -417,7 +425,15 @@ export async function createStore(options: StoreOptions): Promise<QMDStore> {
       });
     },
     searchLex: async (q, opts) => internal.searchFTS(q, opts?.limit, opts?.collection),
-    searchVector: async (q, opts) => internal.searchVec(q, DEFAULT_EMBED_MODEL, opts?.limit, opts?.collection),
+    searchVector: async (q, opts) => internal.searchVec(
+      q,
+      resolveActiveEmbedModel(),
+      opts?.limit,
+      opts?.collection,
+      undefined,
+      undefined,
+      internal.embeddingProvider,
+    ),
     expandQuery: async (q, opts) => internal.expandQuery(q, undefined, opts?.intent),
     get: async (pathOrDocid, opts) => internal.findDocument(pathOrDocid, opts),
     getDocumentBody: async (pathOrDocid, opts) => {