Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,35 @@ Supported model families:
> since vectors are not cross-compatible between models. The prompt format is
> automatically adjusted for each model family.

### OpenAI Embedding Provider (Prototype)

QMD can also use OpenAI for embeddings while keeping query expansion and reranking
on the local GGUF models. Provider selection is env-driven for now.

```sh
export QMD_EMBED_PROVIDER="openai"
export OPENAI_API_KEY="sk-..."
export QMD_EMBED_MODEL="text-embedding-3-small"

# Optional:
export QMD_OPENAI_BASE_URL="https://api.openai.com/v1"
export QMD_OPENAI_EMBED_DIMENSIONS="1024"
export OPENAI_ORG_ID="org_..."
export OPENAI_PROJECT_ID="proj_..."

qmd embed -f
```

Notes:
- `QMD_EMBED_PROVIDER=openai` switches only the embedding path. Reranking and query expansion remain local.
- `QMD_EMBED_MODEL` should be the OpenAI embedding model name when using the OpenAI provider.
- `QMD_OPENAI_BASE_URL` overrides the embeddings endpoint base URL. `OPENAI_BASE_URL` is also accepted.
- `QMD_OPENAI_EMBED_DIMENSIONS` becomes part of the embedding compatibility key, so changing it also requires `qmd embed -f`.
- Switching provider, model, or dimensions requires a full re-embed because stored vectors are only compatible with the active embedding configuration.

If you use YAML or SDK config, `models.embed` can hold the embedding model string,
but provider selection still comes from the environment in this prototype.

## Installation

```sh
Expand Down
128 changes: 78 additions & 50 deletions src/cli/qmd.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ import {
type ReindexResult,
type ChunkStrategy,
} from "../store.js";
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, getDefaultEmbeddingProvider, setDefaultLlamaCpp, setDefaultEmbeddingProvider, createEmbeddingProvider, LlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
import {
formatSearchResults,
formatDocuments,
Expand Down Expand Up @@ -121,10 +121,15 @@ function getStore(): ReturnType<typeof createStore> {
const config = loadConfig();
syncConfigToDb(store.db, config);
if (config.models) {
setDefaultLlamaCpp(new LlamaCpp({
const llm = new LlamaCpp({
embedModel: config.models.embed,
generateModel: config.models.generate,
rerankModel: config.models.rerank,
});
setDefaultLlamaCpp(llm);
setDefaultEmbeddingProvider(createEmbeddingProvider({
embedModel: config.models.embed,
localProvider: llm,
}));
}
} catch {
Expand Down Expand Up @@ -234,10 +239,22 @@ function formatETA(seconds: number): string {
return `${Math.floor(seconds / 3600)}h ${Math.floor((seconds % 3600) / 60)}m`;
}

function resolveActiveEmbeddingModelForStore(store: ReturnType<typeof createStore>): string {
const provider = store.embeddingProvider ?? getDefaultEmbeddingProvider();
return provider.compatibilityKey ?? provider.modelId;
}

function resolveEmbeddingDisplayModelForStore(store: ReturnType<typeof createStore>): string {
const provider = store.embeddingProvider ?? getDefaultEmbeddingProvider();
return provider.modelId;
}

// Check index health and print warnings/tips
function checkIndexHealth(db: Database): void {
const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db);
function checkIndexHealth(store: ReturnType<typeof createStore>): void {
const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(
store.db,
resolveActiveEmbeddingModelForStore(store),
);

// Warn if many docs need embedding
if (needsEmbedding > 0) {
Expand Down Expand Up @@ -317,7 +334,10 @@ function formatBytes(bytes: number): string {

async function showStatus(): Promise<void> {
const dbPath = getDbPath();
const db = getDb();
const store = getStore();
const db = store.db;
const activeEmbeddingModel = resolveActiveEmbeddingModelForStore(store);
const embeddingDisplayModel = resolveEmbeddingDisplayModelForStore(store);

// Collections are defined in YAML; no duplicate cleanup needed.
// Collections are defined in YAML; no duplicate cleanup needed.
Expand All @@ -335,7 +355,7 @@ async function showStatus(): Promise<void> {
// Overall stats
const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get() as { count: number };
const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get() as { count: number };
const needsEmbedding = getHashesNeedingEmbedding(db);
const needsEmbedding = getHashesNeedingEmbedding(db, activeEmbeddingModel);

// Most recent update across all collections
const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get() as { latest: string | null };
Expand Down Expand Up @@ -462,7 +482,7 @@ async function showStatus(): Promise<void> {
return match ? `https://huggingface.co/${match[1]}` : uri;
};
console.log(`\n${c.bold}Models${c.reset}`);
console.log(` Embedding: ${hfLink(DEFAULT_EMBED_MODEL_URI)}`);
console.log(` Embedding: ${hfLink(embeddingDisplayModel)}`);
console.log(` Reranking: ${hfLink(DEFAULT_RERANK_MODEL_URI)}`);
console.log(` Generation: ${hfLink(DEFAULT_GENERATE_MODEL_URI)}`);
}
Expand Down Expand Up @@ -622,7 +642,7 @@ async function updateCollections(): Promise<void> {
}

// Check if any documents need embedding (show once at end)
const needsEmbedding = getHashesNeedingEmbedding(db);
const needsEmbedding = getHashesNeedingEmbedding(db, resolveActiveEmbeddingModelForStore(storeInstance));
closeDb();

console.log(`${c.green}✓ All collections updated.${c.reset}`);
Expand Down Expand Up @@ -1514,6 +1534,8 @@ function collectionRename(oldName: string, newName: string): void {

async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, collectionName?: string, suppressEmbedNotice: boolean = false, ignorePatterns?: string[]): Promise<void> {
const db = getDb();
const storeInstance = getStore();
const activeModelKey = resolveActiveEmbeddingModelForStore(storeInstance);
const resolvedPwd = pwd || getPwd();
const now = new Date().toISOString();
const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"];
Expand Down Expand Up @@ -1635,7 +1657,7 @@ async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, coll
const orphanedContent = cleanupOrphanedContent(db);

// Check if vector index needs updating
const needsEmbedding = getHashesNeedingEmbedding(db);
const needsEmbedding = getHashesNeedingEmbedding(db, activeModelKey);

progress.clear();
console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`);
Expand Down Expand Up @@ -1674,26 +1696,29 @@ function parseChunkStrategy(value: unknown): ChunkStrategy | undefined {
}

async function vectorIndex(
model: string = DEFAULT_EMBED_MODEL_URI,
model?: string,
force: boolean = false,
batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy },
): Promise<void> {
const storeInstance = getStore();
const db = storeInstance.db;
const embedProvider = storeInstance.embeddingProvider ?? getDefaultEmbeddingProvider();
const activeModelKey = model ?? embedProvider.compatibilityKey ?? embedProvider.modelId;
const displayModel = model ?? embedProvider.modelId;

if (force) {
console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
}

// Check if there's work to do before starting
const hashesToEmbed = getHashesNeedingEmbedding(db);
const hashesToEmbed = getHashesNeedingEmbedding(db, activeModelKey);
if (hashesToEmbed === 0 && !force) {
console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
closeDb();
return;
}

console.log(`${c.dim}Model: ${model}${c.reset}\n`);
console.log(`${c.dim}Model: ${displayModel}${c.reset}\n`);
if (batchOptions?.maxDocsPerBatch !== undefined || batchOptions?.maxBatchBytes !== undefined) {
const maxDocsPerBatch = batchOptions.maxDocsPerBatch ?? DEFAULT_EMBED_MAX_DOCS_PER_BATCH;
const maxBatchBytes = batchOptions.maxBatchBytes ?? DEFAULT_EMBED_MAX_BATCH_BYTES;
Expand Down Expand Up @@ -1803,6 +1828,7 @@ type OutputOptions = {
candidateLimit?: number; // Max candidates to rerank (default: 40)
intent?: string; // Domain intent for disambiguation
skipRerank?: boolean; // Skip LLM reranking, use RRF scores only
skipExpand?: boolean; // Skip query expansion, search only the original query
chunkStrategy?: ChunkStrategy; // "auto" (default) or "regex"
};

Expand Down Expand Up @@ -2284,47 +2310,46 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string =
const collectionNames = resolveCollectionFilter(opts.collection, true);
const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;

checkIndexHealth(store.db);

await withLLMSession(async () => {
let results = await vectorSearchQuery(store, query, {
collection: singleCollection,
limit: opts.all ? 500 : (opts.limit || 10),
minScore: opts.minScore || 0.3,
intent: opts.intent,
hooks: {
onExpand: (original, expanded) => {
logExpansionTree(original, expanded);
process.stderr.write(`${c.dim}Searching ${expanded.length + 1} vector queries...${c.reset}\n`);
},
checkIndexHealth(store);

let results = await vectorSearchQuery(store, query, {
collection: singleCollection,
limit: opts.all ? 500 : (opts.limit || 10),
minScore: opts.minScore || 0.3,
intent: opts.intent,
skipExpand: opts.skipExpand,
hooks: opts.skipExpand ? undefined : {
onExpand: (original, expanded) => {
logExpansionTree(original, expanded);
process.stderr.write(`${c.dim}Searching ${expanded.length + 1} vector queries...${c.reset}\n`);
},
});
},
});

// Post-filter for multi-collection
if (collectionNames.length > 1) {
results = results.filter(r => {
const prefixes = collectionNames.map(n => `qmd://${n}/`);
return prefixes.some(p => r.file.startsWith(p));
});
}
// Post-filter for multi-collection
if (collectionNames.length > 1) {
results = results.filter(r => {
const prefixes = collectionNames.map(n => `qmd://${n}/`);
return prefixes.some(p => r.file.startsWith(p));
});
}

closeDb();
closeDb();

if (results.length === 0) {
printEmptySearchResults(opts.format);
return;
}
if (results.length === 0) {
printEmptySearchResults(opts.format);
return;
}

outputResults(results.map(r => ({
file: r.file,
displayPath: r.displayPath,
title: r.title,
body: r.body,
score: r.score,
context: r.context,
docid: r.docid,
})), query, { ...opts, limit: results.length });
}, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
outputResults(results.map(r => ({
file: r.file,
displayPath: r.displayPath,
title: r.title,
body: r.body,
score: r.score,
context: r.context,
docid: r.docid,
})), query, { ...opts, limit: results.length });
}

async function querySearch(query: string, opts: OutputOptions, _embedModel: string = DEFAULT_EMBED_MODEL, _rerankModel: string = DEFAULT_RERANK_MODEL): Promise<void> {
Expand All @@ -2335,7 +2360,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
const collectionNames = resolveCollectionFilter(opts.collection, true);
const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined;

checkIndexHealth(store.db);
checkIndexHealth(store);

// Check for structured query syntax (lex:/vec:/hyde:/intent: prefixes)
const parsed = parseStructuredQuery(query);
Expand Down Expand Up @@ -2512,6 +2537,7 @@ function parseCLI() {
// Query options
"candidate-limit": { type: "string", short: "C" },
"no-rerank": { type: "boolean", default: false },
"no-expand": { type: "boolean", default: false },
intent: { type: "string" },
// Chunking options
"chunk-strategy": { type: "string" }, // "regex" (default) or "auto" (AST for code files)
Expand Down Expand Up @@ -2554,6 +2580,7 @@ function parseCLI() {
lineNumbers: !!values["line-numbers"],
candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
skipRerank: !!values["no-rerank"],
skipExpand: !!values["no-expand"],
explain: !!values.explain,
intent: values.intent as string | undefined,
chunkStrategy: parseChunkStrategy(values["chunk-strategy"]),
Expand Down Expand Up @@ -2776,6 +2803,7 @@ function showHelp(): void {
console.log(" --full - Output full document instead of snippet");
console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
console.log(" --no-rerank - Skip LLM reranking (use RRF scores only, much faster on CPU)");
console.log(" --no-expand - Skip local query expansion (search only the original query)");
console.log(" --line-numbers - Include line numbers in output");
console.log(" --explain - Include retrieval score traces (query --json/CLI)");
console.log(" --files | --json | --csv | --md | --xml - Output format");
Expand Down Expand Up @@ -3107,7 +3135,7 @@ if (isMain) {
const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
await vectorIndex(undefined, !!cli.values.force, {
maxDocsPerBatch,
maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
chunkStrategy: embedChunkStrategy,
Expand Down
20 changes: 18 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import {
structuredSearch,
extractSnippet,
addLineNumbers,
DEFAULT_EMBED_MODEL,
DEFAULT_MULTI_GET_MAX_BYTES,
reindexCollection,
generateEmbeddings,
Expand Down Expand Up @@ -66,6 +65,7 @@ import {
} from "./store.js";
import {
LlamaCpp,
createEmbeddingProvider,
} from "./llm.js";
import {
setConfigSource,
Expand Down Expand Up @@ -375,6 +375,14 @@ export async function createStore(options: StoreOptions): Promise<QMDStore> {
disposeModelsOnInactivity: true,
});
internal.llm = llm;
internal.embeddingProvider = createEmbeddingProvider({
embedModel: config?.models?.embed,
localProvider: llm,
});
const resolveActiveEmbedModel = () =>
internal.embeddingProvider?.compatibilityKey ??
internal.embeddingProvider?.modelId ??
llm.compatibilityKey;

const store: QMDStore = {
internal,
Expand Down Expand Up @@ -417,7 +425,15 @@ export async function createStore(options: StoreOptions): Promise<QMDStore> {
});
},
searchLex: async (q, opts) => internal.searchFTS(q, opts?.limit, opts?.collection),
searchVector: async (q, opts) => internal.searchVec(q, DEFAULT_EMBED_MODEL, opts?.limit, opts?.collection),
searchVector: async (q, opts) => internal.searchVec(
q,
resolveActiveEmbedModel(),
opts?.limit,
opts?.collection,
undefined,
undefined,
internal.embeddingProvider,
),
expandQuery: async (q, opts) => internal.expandQuery(q, undefined, opts?.intent),
get: async (pathOrDocid, opts) => internal.findDocument(pathOrDocid, opts),
getDocumentBody: async (pathOrDocid, opts) => {
Expand Down
Loading