From d4c2a0fa3230a87a6cb5fec0951e1d8620d412e8 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 8 Apr 2026 01:40:40 +0000 Subject: [PATCH 1/4] docs: check off completed vector/semantic search items in roadmap Vector search, hybrid BM25+cosine scoring, embedding providers, and index persistence are all implemented but were still marked as TODO. https://claude.ai/code/session_018i9rTaA3Yycgkx6eHHX4rh --- ROADMAP.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index ddd2b5f..d299867 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -116,11 +116,11 @@ What's built, what's next, and what's deferred. ## v0.7.0 — Advanced Search ### Vector/Semantic Search -- [ ] Optional embedding-based search alongside BM25 -- [ ] Embedding provider: OpenAI `text-embedding-3-small`, or local via Ollama -- [ ] Hybrid scoring: combine BM25 + cosine similarity -- [ ] Store embeddings in `.kb/cache/embeddings.bin` -- [ ] Rebuild embeddings on compile +- [x] Optional embedding-based search alongside BM25 +- [x] Embedding provider: OpenAI `text-embedding-3-small`, or local via Ollama +- [x] Hybrid scoring: combine BM25 + cosine similarity (Reciprocal Rank Fusion) +- [x] Store embeddings in `.kb/cache/vectors.idx` (binary Float32Array format) +- [x] Rebuild embeddings on compile ### Search Improvements - [ ] Fuzzy matching for typo tolerance From 7a750fc490ca5737b5f496c56c28bf39c24fa007 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 8 Apr 2026 01:49:40 +0000 Subject: [PATCH 2/4] =?UTF-8?q?feat(core,cli):=20add=20advanced=20search?= =?UTF-8?q?=20=E2=80=94=20fuzzy=20matching,=20phrase=20search,=20tag/date?= =?UTF-8?q?=20filtering,=20highlighting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fuzzy matching: edit distance ≤ 1 for tokens ≥ 4 chars, with discounted scoring - Phrase search: quoted strings require exact adjacency ("attention mechanism") - Tag filtering: --tag flag filters results by frontmatter tags (AND logic, repeatable) - Date filtering: --since flag filters to articles dated on or after a given date - Highlighting: ANSI bold on matched terms in terminal output (disabled for --json) - Index version bumped to v2 (backward-compatible load from v1) - 27 new tests covering all features (301 total passing) https://claude.ai/code/session_018i9rTaA3Yycgkx6eHHX4rh --- ROADMAP.md | 10 +- bun.lock | 1 + packages/cli/src/commands/search.ts | 24 +- packages/cli/src/index.ts | 7 + packages/core/src/index.ts | 2 +- packages/core/src/search/engine.test.ts | 378 +++++++++++++++++++++++- packages/core/src/search/engine.ts | 204 ++++++++++++- 7 files changed, 604 insertions(+), 22 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index d299867..b3b544a 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -123,11 +123,11 @@ What's built, what's next, and what's deferred. - [x] Rebuild embeddings on compile ### Search Improvements -- [ ] Fuzzy matching for typo tolerance -- [ ] Phrase search with quotes: `kib search '"attention mechanism"'` -- [ ] Tag-based filtering: `kib search "transformers" --tag deep-learning` -- [ ] Date range filtering: `kib search --since 2024-01-01` -- [ ] Search result highlighting in terminal (bold matched terms) +- [x] Fuzzy matching for typo tolerance (edit distance ≤ 1 for tokens ≥ 4 chars) +- [x] Phrase search with quotes: `kib search '"attention mechanism"'` +- [x] Tag-based filtering: `kib search "transformers" --tag deep-learning` +- [x] Date range filtering: `kib search --since 2024-01-01` +- [x] Search result highlighting in terminal (bold matched terms) --- diff --git a/bun.lock b/bun.lock index ed78507..c89da5b 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "kib-monorepo", diff --git a/packages/cli/src/commands/search.ts b/packages/cli/src/commands/search.ts index 7f6665e..fcd0c0a 100644 --- a/packages/cli/src/commands/search.ts +++ b/packages/cli/src/commands/search.ts @@ -10,6 +10,8 @@ interface SearchOpts { limit?: number; json?: boolean; engine?: "builtin" | "vector" | "hybrid"; + tag?: string[]; + since?: string; } export async function search(term: string, opts: SearchOpts) { @@ -30,6 +32,8 @@ export async function search(term: string, opts: SearchOpts) { const scope = opts.wiki ? "wiki" : opts.raw ? "raw" : "all"; const limit = opts.limit ?? 20; + const tags = opts.tag ?? undefined; + const since = opts.since ?? undefined; // Determine search engine let engine = opts.engine; @@ -44,6 +48,8 @@ export async function search(term: string, opts: SearchOpts) { debug(`vault root: ${root}`); debug(`scope: ${scope}, limit: ${limit}, engine: ${engine}, term: "${term}"`); + if (tags) debug(`tag filter: ${tags.join(", ")}`); + if (since) debug(`since filter: ${since}`); const spinner = createSpinner("Searching..."); spinner.start(); @@ -51,6 +57,8 @@ export async function search(term: string, opts: SearchOpts) { let results: SearchResult[]; let elapsed: number; + const searchOpts = { limit, tag: tags, since, highlight: !opts.json }; + if (engine === "hybrid" || engine === "vector") { const endIndex = debugTime("load/build hybrid index"); const bm25 = new SearchIndex(); @@ -83,6 +91,15 @@ export async function search(term: string, opts: SearchOpts) { const start = performance.now(); results = await hybrid.search(term, provider, { limit }); elapsed = Math.round(performance.now() - start); + + // Apply tag/date filters and highlighting post-hoc for hybrid + // (BM25 side supports it natively, but hybrid fuses results) + if (tags || since) { + // Re-run BM25 with filters to get filtered results + const filteredBm25 = bm25.search(term, searchOpts); + const filteredPaths = new Set(filteredBm25.map((r) => r.path)); + results = results.filter((r) => filteredPaths.has(r.path)); + } } else { endIndex(); // Fallback path @@ -94,7 +111,7 @@ export async function search(term: string, opts: SearchOpts) { await index.save(root); } const start = performance.now(); - results = index.search(term, { limit }); + results = index.search(term, searchOpts); elapsed = Math.round(performance.now() - start); } } else { @@ -112,7 +129,7 @@ export async function search(term: string, opts: SearchOpts) { endIndex(); const start = performance.now(); - results = index.search(term, { limit }); + results = index.search(term, searchOpts); elapsed = Math.round(performance.now() - start); } @@ -143,14 +160,13 @@ export async function search(term: string, opts: SearchOpts) { console.log(` ${num}. ${title} ${score}`); console.log(` ${dimPath(r.path)}`); if (r.snippet) { - console.log(` ${truncate(r.snippet, 80)}`); + console.log(` ${truncate(r.snippet, 120)}`); } console.log(); } } function dimPath(path: string): string { - // Import chalk dynamically to keep lazy loading return `\x1b[2m${path}\x1b[0m`; } diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index 0df3a58..0390dd2 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -76,6 +76,13 @@ program .option("--limit ", "max results", Number.parseInt) .option("--json", "JSON output") .option("--engine ", "search engine: builtin, vector, hybrid") + .option( + "--tag ", + "filter by tag (repeatable)", + (val: string, prev: string[]) => [...prev, val], + [] as string[], + ) + .option("--since ", "filter to articles dated on or after (YYYY-MM-DD)") .action(async (term, opts) => { const { search } = await import("./commands/search.js"); await search(term, opts); diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index a85e233..86b1480 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -16,7 +16,7 @@ export { ALL_RULES } from "./lint/rules.js"; export { createProvider, detectProvider } from "./providers/router.js"; export { queryVault } from "./query/query.js"; export * from "./schemas.js"; -export { SearchIndex } from "./search/engine.js"; +export { highlightSnippet, parseQuery, SearchIndex } from "./search/engine.js"; export { HybridSearch } from "./search/hybrid.js"; export { VectorIndex } from "./search/vector.js"; export { findSkill, loadSkills } from "./skills/loader.js"; diff --git a/packages/core/src/search/engine.test.ts b/packages/core/src/search/engine.test.ts index e2398f5..5308608 100644 --- a/packages/core/src/search/engine.test.ts +++ b/packages/core/src/search/engine.test.ts @@ -3,7 +3,7 @@ import { mkdtemp, rm } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { initVault, writeWiki } from "../vault.js"; -import { SearchIndex } from "./engine.js"; +import { editDistance1, highlightSnippet, parseQuery, SearchIndex } from "./engine.js"; let tempDir: string; @@ -228,3 +228,379 @@ describe("SearchIndex", () => { expect(results[0]!.title).toBe("Transformer"); }); }); + +// ─── Fuzzy Matching ───────────────────────────────────────────── + +describe("editDistance1", () => { + test("returns true for single substitution", () => { + expect(editDistance1("cat", "bat")).toBe(true); + expect(editDistance1("hello", "hallo")).toBe(true); + }); + + test("returns true for single insertion", () => { + expect(editDistance1("cat", "cart")).toBe(true); + expect(editDistance1("test", "teset")).toBe(true); + }); + + test("returns true for single deletion", () => { + expect(editDistance1("cart", "cat")).toBe(true); + expect(editDistance1("hello", "helo")).toBe(true); + }); + + test("returns false for identical strings", () => { + expect(editDistance1("same", "same")).toBe(false); + }); + + test("returns false for distance > 1", () => { + expect(editDistance1("cat", "dog")).toBe(false); + expect(editDistance1("hello", "world")).toBe(false); + expect(editDistance1("abc", "abcde")).toBe(false); + }); + + test("returns false for empty vs 2+ chars", () => { + expect(editDistance1("", "ab")).toBe(false); + }); + + test("handles single char edge cases", () => { + expect(editDistance1("a", "b")).toBe(true); // substitution + expect(editDistance1("a", "ab")).toBe(true); // insertion + expect(editDistance1("ab", "a")).toBe(true); // deletion + }); +}); + +describe("fuzzy search", () => { + test("finds results with typos (edit distance 1)", async () => { + const root = await makeTempVault(); + await writeWiki( + root, + "concepts/transformer.md", + articleMd( + "Transformer Architecture", + "The transformer is a neural network architecture based on self-attention mechanisms.", + ), + ); + + const index = new SearchIndex(); + await index.build(root, "wiki"); + + // "transfomer" is a common typo (missing 'r') + const results = index.search("transfomer"); + expect(results.length).toBeGreaterThan(0); + expect(results[0]!.title).toBe("Transformer Architecture"); + }); + + test("does not fuzzy match short tokens (< 4 chars)", async () => { + const root = await makeTempVault(); + await writeWiki( + root, + "concepts/ai.md", + articleMd("AI Basics", "AI is artificial intelligence."), + ); + + const index = new SearchIndex(); + await index.build(root, "wiki"); + + // "ax" is short — should not fuzzy-match "ai" + const results = index.search("ax"); + expect(results).toHaveLength(0); + }); +}); + +// ─── Phrase Search ────────────────────────────────────────────── + +describe("parseQuery", () => { + test("extracts quoted phrases", () => { + const result = parseQuery('"attention mechanism" transformer'); + expect(result.phrases).toEqual(["attention mechanism"]); + expect(result.terms).toEqual(["transformer"]); + }); + + test("handles multiple quoted phrases", () => { + const result = parseQuery('"hello world" foo "bar baz"'); + expect(result.phrases).toEqual(["hello world", "bar baz"]); + expect(result.terms).toEqual(["foo"]); + }); + + test("handles no quotes", () => { + const result = parseQuery("simple search query"); + expect(result.phrases).toEqual([]); + expect(result.terms).toEqual(["simple", "search", "query"]); + }); + + test("handles only quotes", () => { + const result = parseQuery('"exact phrase"'); + expect(result.phrases).toEqual(["exact phrase"]); + expect(result.terms).toEqual([]); + }); + + test("handles empty query", () => { + const result = parseQuery(""); + expect(result.phrases).toEqual([]); + expect(result.terms).toEqual([]); + }); +}); + +describe("phrase search", () => { + test("exact phrase matches rank and filter correctly", async () => { + const root = await makeTempVault(); + await writeWiki( + root, + "concepts/attention.md", + articleMd( + "Attention Mechanisms", + "Self-attention mechanisms compute weighted sums. Attention is all you need.", + ), + ); + await writeWiki( + root, + "concepts/rnn.md", + articleMd( + "Recurrent Networks", + "RNNs process sequences. Some use attention over hidden states.", + ), + ); + + const index = new SearchIndex(); + await index.build(root, "wiki"); + + // Phrase search should only match articles containing the exact phrase + const results = index.search('"attention is all you need"'); + expect(results.length).toBe(1); + expect(results[0]!.title).toBe("Attention Mechanisms"); + }); + + test("phrase search with additional terms", async () => { + const root = await makeTempVault(); + await writeWiki( + root, + "concepts/transformer.md", + articleMd("Transformer", "The transformer uses self-attention mechanisms for processing."), + ); + await writeWiki( + root, + "concepts/cnn.md", + articleMd("CNN", "CNNs use convolutional layers for self-attention on images. Not really."), + ); + + const index = new SearchIndex(); + await index.build(root, "wiki"); + + // Only the transformer article has "self-attention mechanisms" as a phrase + const results = index.search('"self-attention mechanisms" transformer'); + expect(results.length).toBe(1); + expect(results[0]!.title).toBe("Transformer"); + }); +}); + +// ─── Tag Filtering ────────────────────────────────────────────── + +describe("tag filtering", () => { + function taggedArticle(title: string, tags: string[], content: string): string { + return `---\ntitle: ${title}\nslug: ${title.toLowerCase().replace(/\s+/g, "-")}\ntags: [${tags.join(", ")}]\n---\n\n# ${title}\n\n${content}`; + } + + test("filters results by single tag", async () => { + const root = await makeTempVault(); + await writeWiki( + root, + "concepts/transformer.md", + taggedArticle("Transformer", ["deep-learning", "nlp"], "Neural network architecture."), + ); + await writeWiki( + root, + "concepts/cnn.md", + taggedArticle("CNN", ["deep-learning", "vision"], "Convolutional neural network."), + ); + await writeWiki( + root, + "concepts/bert.md", + taggedArticle("BERT", ["nlp"], "Bidirectional encoder from transformers. A neural network."), + ); + + const index = new SearchIndex(); + await index.build(root, "wiki"); + + const results = index.search("neural network", { tag: "nlp" }); + // Only transformer and BERT have the nlp tag + expect(results.every((r) => r.title === "Transformer" || r.title === "BERT")).toBe(true); + expect(results.some((r) => r.title === "CNN")).toBe(false); + }); + + test("filters results by multiple tags (AND logic)", async () => { + const root = await makeTempVault(); + await writeWiki( + root, + "concepts/transformer.md", + taggedArticle("Transformer", ["deep-learning", "nlp"], "A neural architecture."), + ); + await writeWiki( + root, + "concepts/bert.md", + taggedArticle("BERT", ["nlp"], "Bidirectional encoder. A neural architecture."), + ); + + const index = new SearchIndex(); + await index.build(root, "wiki"); + + const results = index.search("neural", { tag: ["deep-learning", "nlp"] }); + // Only transformer has both tags + expect(results.length).toBe(1); + expect(results[0]!.title).toBe("Transformer"); + }); + + test("returns empty when no docs match tag", async () => { + const root = await makeTempVault(); + await writeWiki( + root, + "concepts/test.md", + taggedArticle("Test", ["misc"], "Some content about testing."), + ); + + const index = new SearchIndex(); + await index.build(root, "wiki"); + + const results = index.search("test", { tag: "nonexistent" }); + expect(results).toHaveLength(0); + }); +}); + +// ─── Date Filtering ───────────────────────────────────────────── + +describe("date filtering", () => { + function datedArticle(title: string, date: string, content: string): string { + return `---\ntitle: ${title}\nslug: ${title.toLowerCase().replace(/\s+/g, "-")}\ndate: ${date}\n---\n\n# ${title}\n\n${content}`; + } + + test("filters results by --since date", async () => { + const root = await makeTempVault(); + await writeWiki( + root, + "concepts/old.md", + datedArticle("Old Article", "2023-01-15", "Neural networks from the past."), + ); + await writeWiki( + root, + "concepts/new.md", + datedArticle("New Article", "2025-06-01", "Recent neural network research."), + ); + + const index = new SearchIndex(); + await index.build(root, "wiki"); + + const results = index.search("neural", { since: "2025-01-01" }); + expect(results.length).toBe(1); + expect(results[0]!.title).toBe("New Article"); + }); + + test("includes articles on the exact --since date", async () => { + const root = await makeTempVault(); + await writeWiki( + root, + "concepts/exact.md", + datedArticle("Exact Date", "2025-03-15", "Neural network content."), + ); + + const index = new SearchIndex(); + await index.build(root, "wiki"); + + const results = index.search("neural", { since: "2025-03-15" }); + expect(results.length).toBe(1); + }); + + test("includes articles with no date when using --since", async () => { + const root = await makeTempVault(); + await writeWiki( + root, + "concepts/nodated.md", + articleMd("No Date", "Neural network with no date frontmatter."), + ); + + const index = new SearchIndex(); + await index.build(root, "wiki"); + + // Articles with no date should not be excluded + const results = index.search("neural", { since: "2025-01-01" }); + expect(results.length).toBe(1); + }); +}); + +// ─── Highlighting ─────────────────────────────────────────────── + +describe("highlightSnippet", () => { + test("bolds matched words", () => { + const result = highlightSnippet("The transformer architecture is powerful", ["transform"]); + expect(result).toContain("\x1b[1mtransformer\x1b[22m"); + expect(result).toContain("The"); + expect(result).toContain("is powerful"); + }); + + test("highlights multiple terms", () => { + const result = highlightSnippet("Neural networks use attention mechanisms", [ + "neural", + "attention", + ]); + expect(result).toContain("\x1b[1mNeural\x1b[22m"); + expect(result).toContain("\x1b[1mattention\x1b[22m"); + }); + + test("returns unchanged snippet when no tokens match", () => { + const snippet = "No matches here"; + const result = highlightSnippet(snippet, ["quantum"]); + expect(result).toBe(snippet); + }); + + test("returns unchanged snippet with empty tokens", () => { + const snippet = "Some text"; + expect(highlightSnippet(snippet, [])).toBe(snippet); + }); +}); + +// ─── Save/Load with tags and date ─────────────────────────────── + +describe("index serialization with metadata", () => { + function taggedDatedArticle( + title: string, + tags: string[], + date: string, + content: string, + ): string { + return `---\ntitle: ${title}\nslug: ${title.toLowerCase().replace(/\s+/g, "-")}\ntags: [${tags.join(", ")}]\ndate: ${date}\n---\n\n# ${title}\n\n${content}`; + } + + test("save and load preserves tag and date filtering", async () => { + const root = await makeTempVault(); + await writeWiki( + root, + "concepts/tagged.md", + taggedDatedArticle( + "Tagged Article", + ["ml", "nlp"], + "2025-06-01", + "Machine learning content.", + ), + ); + await writeWiki( + root, + "concepts/other.md", + taggedDatedArticle("Other Article", ["vision"], "2024-01-01", "Computer vision content."), + ); + + const index1 = new SearchIndex(); + await index1.build(root, "wiki"); + await index1.save(root); + + const index2 = new SearchIndex(); + const loaded = await index2.load(root); + expect(loaded).toBe(true); + + // Tag filter should still work after load + const tagResults = index2.search("content", { tag: "ml" }); + expect(tagResults.length).toBe(1); + expect(tagResults[0]!.title).toBe("Tagged Article"); + + // Date filter should still work after load + const dateResults = index2.search("content", { since: "2025-01-01" }); + expect(dateResults.length).toBe(1); + expect(dateResults[0]!.title).toBe("Tagged Article"); + }); +}); diff --git a/packages/core/src/search/engine.ts b/packages/core/src/search/engine.ts index 8e2db56..544e81f 100644 --- a/packages/core/src/search/engine.ts +++ b/packages/core/src/search/engine.ts @@ -165,16 +165,20 @@ interface Document { tokens: string[]; tokenCount: number; termFreqs: Map; + tags: string[]; + date: string | null; } interface SerializedIndex { - version: 1; + version: 2; documents: { path: string; title: string; snippet: string; tokenCount: number; termFreqs: [string, number][]; + tags: string[]; + date: string | null; }[]; idf: [string, number][]; avgDl: number; @@ -210,6 +214,20 @@ export class SearchIndex { const title = (frontmatter.title as string) ?? filePath.split("/").pop()?.replace(/\.md$/, "") ?? ""; + // Extract tags from frontmatter + const rawTags = frontmatter.tags; + const tags: string[] = Array.isArray(rawTags) + ? rawTags.map((t: unknown) => String(t).toLowerCase()) + : []; + + // Extract date from frontmatter (try common field names) + const rawDate = + (frontmatter.date as string) ?? + (frontmatter.created as string) ?? + (frontmatter.ingested as string) ?? + null; + const date = rawDate && !Number.isNaN(Date.parse(String(rawDate))) ? String(rawDate) : null; + const tokens = tokenize(`${title} ${title} ${body}`); // title gets extra weight const termFreqs = new Map(); for (const token of tokens) { @@ -223,6 +241,8 @@ export class SearchIndex { tokens, tokenCount: tokens.length, termFreqs, + tags, + date, }); } @@ -254,34 +274,92 @@ export class SearchIndex { } /** - * Search the index using BM25 scoring. + * Search the index using BM25 scoring with fuzzy matching, phrase search, + * tag filtering, date filtering, and optional highlighting. */ - search(query: string, opts: { limit?: number; threshold?: number } = {}): SearchResult[] { + search( + query: string, + opts: { + limit?: number; + threshold?: number; + tag?: string | string[]; + since?: string; + highlight?: boolean; + } = {}, + ): SearchResult[] { const limit = opts.limit ?? 20; const threshold = opts.threshold ?? 0; - const queryTokens = tokenize(query); + const highlight = opts.highlight ?? false; + + // Parse tag filter + const tagFilter: string[] | null = opts.tag + ? (Array.isArray(opts.tag) ? opts.tag : [opts.tag]).map((t) => t.toLowerCase()) + : null; - if (queryTokens.length === 0 || this.documents.length === 0) { + // Parse date filter + const sinceTs = opts.since ? Date.parse(opts.since) : null; + + // Parse phrases (quoted strings) and remaining terms + const { phrases, terms } = parseQuery(query); + const queryTokens = terms.flatMap((t) => tokenize(t)); + + if ((queryTokens.length === 0 && phrases.length === 0) || this.documents.length === 0) { return []; } const scores: { doc: Document; score: number }[] = []; for (const doc of this.documents) { + // Tag filter: skip docs that don't have all required tags + if (tagFilter && !tagFilter.every((t) => doc.tags.includes(t))) { + continue; + } + + // Date filter: skip docs older than --since + if (sinceTs && doc.date) { + const docTs = Date.parse(doc.date); + if (!Number.isNaN(docTs) && docTs < sinceTs) continue; + } + + // Phrase filter: skip docs that don't contain all exact phrases + if (phrases.length > 0) { + const lowerContent = `${doc.title} ${doc.content}`.toLowerCase(); + if (!phrases.every((p) => lowerContent.includes(p.toLowerCase()))) { + continue; + } + } + let score = 0; const dl = doc.tokenCount; for (const qt of queryTokens) { - const tf = doc.termFreqs.get(qt) ?? 0; + // Exact match first + let tf = doc.termFreqs.get(qt) ?? 0; + + // Fuzzy match: if no exact hit, check edit distance ≤ 1 for tokens ≥ 4 chars + if (tf === 0 && qt.length >= 4) { + for (const [docToken, freq] of doc.termFreqs) { + if (editDistance1(qt, docToken)) { + tf = Math.ceil(freq * 0.8); // discount fuzzy matches slightly + break; + } + } + } + if (tf === 0) continue; - const idfVal = this.idf.get(qt) ?? 0; + const idfVal = this.idf.get(qt) ?? this.computeFuzzyIdf(qt); const tfNorm = (tf * (this.k1 + 1)) / (tf + this.k1 * (1 - this.b + this.b * (dl / this.avgDl))); score += idfVal * tfNorm; } + // Give a bonus for phrase matches (phrases already filtered above) + if (phrases.length > 0) { + score += phrases.length * 2.0; + } + if (score > threshold) { scores.push({ doc, score }); } @@ -290,26 +368,48 @@ export class SearchIndex { // Sort by score descending scores.sort((a, b) => b.score - a.score); + // Collect all terms for highlighting (query tokens + phrase words) + const highlightTerms = highlight + ? [...queryTokens, ...phrases.flatMap((p) => tokenize(p))] + : []; + return scores.slice(0, limit).map(({ doc, score }) => ({ path: doc.path, score: Math.round(score * 100) / 100, - snippet: extractSnippet(doc.content, queryTokens), + snippet: highlight + ? highlightSnippet( + extractSnippet(doc.content, [...queryTokens, ...phrases]), + highlightTerms, + ) + : extractSnippet(doc.content, [...queryTokens, ...phrases]), title: doc.title || undefined, })); } + /** + * Compute approximate IDF for a fuzzy-matched term by finding the closest known term. + */ + private computeFuzzyIdf(token: string): number { + for (const [term, idf] of this.idf) { + if (editDistance1(token, term)) return idf * 0.8; + } + return 0; + } + /** * Serialize the index for caching. */ serialize(): string { const data: SerializedIndex = { - version: 1, + version: 2, documents: this.documents.map((d) => ({ path: d.path, title: d.title, snippet: d.content.slice(0, 200), tokenCount: d.tokens.length, termFreqs: [...d.termFreqs.entries()], + tags: d.tags, + date: d.date, })), idf: [...this.idf.entries()], avgDl: this.avgDl, @@ -336,9 +436,9 @@ export class SearchIndex { try { const raw = await readFile(path, "utf-8"); - const data = JSON.parse(raw) as SerializedIndex; + const data = JSON.parse(raw) as SerializedIndex & { version: number }; - if (data.version !== 1) return false; + if (data.version !== 1 && data.version !== 2) return false; this.documents = data.documents.map((d) => ({ path: d.path, @@ -347,6 +447,8 @@ export class SearchIndex { tokens: [], // Not needed for search — termFreqs is enough tokenCount: d.tokenCount, termFreqs: new Map(d.termFreqs), + tags: (d as { tags?: string[] }).tags ?? [], + date: (d as { date?: string | null }).date ?? null, })); this.idf = new Map(data.idf); this.avgDl = data.avgDl; @@ -388,3 +490,83 @@ function extractSnippet(content: string, queryTokens: string[], maxLength = 150) return snippet; } + +// ─── Query Parser ─────────────────────────────────────────────── + +/** + * Parse a search query into exact phrases (quoted) and remaining terms. + * Example: `"attention mechanism" transformer` → phrases: ["attention mechanism"], terms: ["transformer"] + */ +export function parseQuery(query: string): { phrases: string[]; terms: string[] } { + const phrases: string[] = []; + const remaining = query.replace(/"([^"]+)"/g, (_match, phrase: string) => { + phrases.push(phrase); + return ""; + }); + const terms = remaining + .split(/\s+/) + .map((t) => t.trim()) + .filter(Boolean); + return { phrases, terms }; +} + +// ─── Fuzzy Matching ───────────────────────────────────────────── + +/** + * Check if two strings have edit distance ≤ 1 (substitution, insertion, or deletion). + * Optimized: avoids full DP matrix by bailing early. + */ +export function editDistance1(a: string, b: string): boolean { + const lenDiff = a.length - b.length; + if (lenDiff > 1 || lenDiff < -1) return false; + + if (a.length === b.length) { + // Check for exactly one substitution + let diffs = 0; + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) { + diffs++; + if (diffs > 1) return false; + } + } + return diffs === 1; + } + + // One is longer by 1: check for single insertion/deletion + const longer = a.length > b.length ? a : b; + const shorter = a.length > b.length ? b : a; + let i = 0; + let j = 0; + let diffs = 0; + while (i < longer.length && j < shorter.length) { + if (longer[i] !== shorter[j]) { + diffs++; + if (diffs > 1) return false; + i++; // skip the extra char in the longer string + } else { + i++; + j++; + } + } + return true; +} + +// ─── Highlighting ─────────────────────────────────────────────── + +/** + * Highlight matched terms in a snippet using ANSI bold. + * Matches stemmed forms so "transformers" highlights when searching for "transformer". + */ +export function highlightSnippet(snippet: string, queryTokens: string[]): string { + if (queryTokens.length === 0) return snippet; + + // Build a regex that matches any word whose stem matches a query token + // We match whole words and check stems + return snippet.replace(/[a-zA-Z0-9]+/g, (word) => { + const stemmed = stem(word.toLowerCase()); + if (queryTokens.some((qt) => stemmed === qt || editDistance1(stemmed, qt))) { + return `\x1b[1m${word}\x1b[22m`; // ANSI bold + } + return word; + }); +} From 8a8bebc2b8ab3b1455be79050345adfb915e226a Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 8 Apr 2026 01:53:11 +0000 Subject: [PATCH 3/4] feat(cli): add tag, since, and phrase search params to MCP kib_search tool Expose the new advanced search capabilities (tag filtering, date filtering, phrase search) through the MCP server so AI clients can use them. https://claude.ai/code/session_018i9rTaA3Yycgkx6eHHX4rh --- packages/cli/src/mcp/server.ts | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/mcp/server.ts b/packages/cli/src/mcp/server.ts index bd149d6..b27387d 100644 --- a/packages/cli/src/mcp/server.ts +++ b/packages/cli/src/mcp/server.ts @@ -170,15 +170,27 @@ export function createMcpServer(root: string) { server.tool( "kib_search", - "Search the knowledge base using full-text BM25 search", + "Search the knowledge base using full-text BM25 search. Supports fuzzy matching, phrase search (wrap in quotes), tag filtering, and date filtering.", { - query: z.string().describe("Search query"), + query: z + .string() + .describe( + 'Search query. Wrap phrases in quotes for exact match, e.g. "attention mechanism"', + ), limit: z.number().int().positive().max(50).default(10).describe("Max results"), + tag: z + .union([z.string(), z.array(z.string())]) + .optional() + .describe("Filter by frontmatter tag(s). Single tag or array for AND logic."), + since: z + .string() + .optional() + .describe("Filter to articles dated on or after this date (YYYY-MM-DD)"), }, - async ({ query, limit }) => { + async ({ query, limit, tag, since }) => { try { const index = await ctx.getSearchIndex(); - const results = index.search(query, { limit }); + const results = index.search(query, { limit, tag, since }); const prefix = `${root}/`; return json( results.map((r) => ({ From 423f8719614c139ea6e983aff06f865eca04e373 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 8 Apr 2026 02:00:19 +0000 Subject: [PATCH 4/4] feat(cli): add kib_config, kib_skill, kib_export MCP tools and close param gaps New MCP tools: - kib_config: get/set/list vault configuration - kib_skill: list and run vault skills (summarize, flashcards, etc) - kib_export: export wiki as markdown or HTML static site Enhanced existing tools: - kib_search: add scope param (wiki/raw/all) - kib_lint: add fix param for auto-fixing issues - kib_compile: add max param to limit sources per pass - kib_ingest: add dry_run param for preview MCP server now exposes 11 tools (was 8), matching CLI capabilities. 14 new MCP tests added (315 total passing). https://claude.ai/code/session_018i9rTaA3Yycgkx6eHHX4rh --- packages/cli/src/mcp/export-helper.ts | 178 +++++++++++++++++ packages/cli/src/mcp/server.test.ts | 264 ++++++++++++++++++++++++++ packages/cli/src/mcp/server.ts | 226 +++++++++++++++++++++- 3 files changed, 661 insertions(+), 7 deletions(-) create mode 100644 packages/cli/src/mcp/export-helper.ts diff --git a/packages/cli/src/mcp/export-helper.ts b/packages/cli/src/mcp/export-helper.ts new file mode 100644 index 0000000..eb636e1 --- /dev/null +++ b/packages/cli/src/mcp/export-helper.ts @@ -0,0 +1,178 @@ +import { copyFile, mkdir, readFile, writeFile } from "node:fs/promises"; +import { join, relative } from "node:path"; +import { listImageAssets, listWiki, parseFrontmatter, WIKI_DIR } from "@kibhq/core"; + +export async function exportVault( + root: string, + format: "markdown" | "html", + output?: string, +): Promise<{ format: string; output: string; files: number }> { + const outputDir = output ?? join(root, "export"); + + let fileCount: number; + switch (format) { + case "markdown": + fileCount = await exportMarkdown(root, outputDir); + break; + case "html": + fileCount = await exportHtml(root, outputDir); + break; + default: + throw new Error(`Unsupported format: ${format}. Use 'markdown' or 'html'.`); + } + + return { format, output: outputDir, files: fileCount }; +} + +async function exportMarkdown(root: string, outputDir: string): Promise { + const wikiDir = join(root, WIKI_DIR); + const files = await listWiki(root); + + await copyImageAssets(root, outputDir); + + for (const filePath of files) { + const content = await readFile(filePath, "utf-8"); + const relPath = relative(wikiDir, filePath); + const outPath = join(outputDir, relPath); + + await mkdir(join(outPath, ".."), { recursive: true }); + + const cleaned = content.replace(/^---[\s\S]*?---\s*\n/, ""); + const resolved = cleaned.replace( + /\[\[([^\]]+)\]\]/g, + (_, slug: string) => `[${slug}](${slug}.md)`, + ); + + await writeFile(outPath, resolved, "utf-8"); + } + + return files.length; +} + +const SHARED_CSS = ` + body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; max-width: 720px; margin: 2rem auto; padding: 0 1rem; line-height: 1.6; color: #1a1a1a; } + a { color: #0066cc; } + code { background: #f4f4f4; padding: 0.2em 0.4em; border-radius: 3px; font-size: 0.9em; } + pre { background: #f4f4f4; padding: 1rem; border-radius: 6px; overflow-x: auto; } + pre code { background: none; padding: 0; } + nav { margin-bottom: 2rem; padding-bottom: 1rem; border-bottom: 1px solid #eee; } + nav a { margin-right: 1rem; } + img { max-width: 100%; height: auto; border-radius: 6px; margin: 1rem 0; }`; + +async function exportHtml(root: string, outputDir: string): Promise { + const wikiDir = join(root, WIKI_DIR); + const files = await listWiki(root); + + await mkdir(outputDir, { recursive: true }); + const imageFiles = await copyImageAssets(root, outputDir); + + const articles: { title: string; htmlPath: string }[] = []; + + for (const filePath of files) { + const content = await readFile(filePath, "utf-8"); + const relPath = relative(wikiDir, filePath); + const { frontmatter, body } = parseFrontmatter(content); + const title = (frontmatter.title as string) ?? relPath.replace(/\.md$/, ""); + const htmlPath = relPath.replace(/\.md$/, ".html"); + + const depth = htmlPath.split("/").length - 1; + const prefix = depth > 0 ? "../".repeat(depth) : ""; + + const html = simpleMarkdownToHtml(body, prefix); + + const page = ` + + + + + ${escapeHtml(title)} + + + + +

${escapeHtml(title)}

+ ${html} + +`; + + const outPath = join(outputDir, htmlPath); + await mkdir(join(outPath, ".."), { recursive: true }); + await writeFile(outPath, page, "utf-8"); + + articles.push({ title, htmlPath }); + } + + const indexHtml = ` + + + + + Knowledge Base + + + +

Knowledge Base

+

${articles.length} articles

+
    + ${articles + .sort((a, b) => a.title.localeCompare(b.title)) + .map((a) => `
  • ${escapeHtml(a.title)}
  • `) + .join("\n ")} +
+ +`; + + await writeFile(join(outputDir, "index.html"), indexHtml, "utf-8"); + + return articles.length; +} + +async function copyImageAssets(root: string, outputDir: string): Promise { + const imageFiles = await listImageAssets(root); + if (imageFiles.length === 0) return []; + + const srcDir = join(root, WIKI_DIR, "images"); + const destDir = join(outputDir, "images"); + await mkdir(destDir, { recursive: true }); + + for (const filename of imageFiles) { + await copyFile(join(srcDir, filename), join(destDir, filename)); + } + + return imageFiles; +} + +function simpleMarkdownToHtml(md: string, imagePrefix = ""): string { + return md + .replace(/```(\w*)\n([\s\S]*?)```/g, "
$2
") + .replace(/^### (.+)$/gm, "

$1

") + .replace(/^## (.+)$/gm, "

$1

") + .replace(/^# (.+)$/gm, "

$1

") + .replace(/\*\*(.+?)\*\*/g, "$1") + .replace(/\*(.+?)\*/g, "$1") + .replace(/`([^`]+)`/g, "$1") + .replace( + /!\[([^\]]*)\]\((images\/[^)]+)\)/g, + (_, alt: string, src: string) => `${escapeHtml(alt)}`, + ) + .replace( + /!\[([^\]]*)\]\((https?:\/\/[^)]+)\)/g, + (_, alt: string, src: string) => `${escapeHtml(alt)}`, + ) + .replace(/\[\[([^\]]+)\]\]/g, '$1') + .replace(/\[([^\]]+)\]\(([^)]+)\)/g, '$1') + .replace(/^- (.+)$/gm, "
  • $1
  • ") + .replace(/\n\n/g, "

    ") + .replace(/^/, "

    ") + .replace(/$/, "

    ") + .replace(/

  • /g, "
    • ") + .replace(/<\/li><\/p>/g, "
    "); +} + +function escapeHtml(str: string): string { + return str + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} diff --git a/packages/cli/src/mcp/server.test.ts b/packages/cli/src/mcp/server.test.ts index c3f68bd..bf20651 100644 --- a/packages/cli/src/mcp/server.test.ts +++ b/packages/cli/src/mcp/server.test.ts @@ -291,6 +291,267 @@ describe("MCP server", () => { }); }); + // ── kib_search advanced ─────────────────────────────────── + + describe("kib_search advanced", () => { + function taggedArticle(title: string, tags: string[], content: string): string { + return `---\ntitle: ${title}\nslug: ${title.toLowerCase().replace(/\s+/g, "-")}\ntags: [${tags.join(", ")}]\ndate: 2025-06-01\n---\n\n# ${title}\n\n${content}`; + } + + test("filters by tag", async () => { + const root = await makeTempVault(); + await writeWiki( + root, + "concepts/transformer.md", + taggedArticle("Transformer", ["nlp", "deep-learning"], "A neural network."), + ); + await writeWiki( + root, + "concepts/cnn.md", + taggedArticle("CNN", ["vision"], "A convolutional neural network."), + ); + const client = await createClient(root); + + const result = await client.callTool({ + name: "kib_search", + arguments: { query: "neural network", tag: "nlp" }, + }); + expect(result.isError).toBeFalsy(); + + const hits = JSON.parse(textOf(result)); + expect(hits.length).toBe(1); + expect(hits[0].path).toContain("transformer.md"); + }); + + test("filters by since date", async () => { + const root = await makeTempVault(); + await writeWiki( + root, + "concepts/old.md", + `---\ntitle: Old\nslug: old\ndate: 2023-01-01\n---\n\n# Old\n\nNeural network.`, + ); + await writeWiki( + root, + "concepts/new.md", + `---\ntitle: New\nslug: new\ndate: 2025-06-01\n---\n\n# New\n\nNeural network.`, + ); + const client = await createClient(root); + + const result = await client.callTool({ + name: "kib_search", + arguments: { query: "neural", since: "2025-01-01" }, + }); + expect(result.isError).toBeFalsy(); + + const hits = JSON.parse(textOf(result)); + expect(hits.length).toBe(1); + expect(hits[0].path).toContain("new.md"); + }); + + test("scopes search to wiki only", async () => { + const root = await makeTempVault(); + await writeWiki(root, "concepts/wiki.md", articleMd("Wiki Article", "Neural network.")); + await writeRaw(root, "articles/raw.md", "# Raw Article\n\nNeural network."); + const client = await createClient(root); + + const result = await client.callTool({ + name: "kib_search", + arguments: { query: "neural", scope: "wiki" }, + }); + expect(result.isError).toBeFalsy(); + + const hits = JSON.parse(textOf(result)); + expect(hits.every((h: { path: string }) => h.path.includes("wiki/"))).toBe(true); + }); + }); + + // ── kib_config ───────────────────────────────────────────── + + describe("kib_config", () => { + test("lists all config", async () => { + const root = await makeTempVault(); + const client = await createClient(root); + + const result = await client.callTool({ name: "kib_config", arguments: {} }); + expect(result.isError).toBeFalsy(); + + const data = JSON.parse(textOf(result)); + expect(data.provider).toBeDefined(); + expect(data.search).toBeDefined(); + }); + + test("reads a specific config key", async () => { + const root = await makeTempVault(); + const client = await createClient(root); + + const result = await client.callTool({ + name: "kib_config", + arguments: { key: "search.engine" }, + }); + expect(result.isError).toBeFalsy(); + + const data = JSON.parse(textOf(result)); + expect(data["search.engine"]).toBeDefined(); + }); + + test("sets a config value", async () => { + const root = await makeTempVault(); + const client = await createClient(root); + + const setResult = await client.callTool({ + name: "kib_config", + arguments: { key: "search.engine", value: "hybrid" }, + }); + expect(setResult.isError).toBeFalsy(); + + const data = JSON.parse(textOf(setResult)); + expect(data["search.engine"]).toBe("hybrid"); + expect(data.saved).toBe(true); + + // Verify it persisted + const getResult = await client.callTool({ + name: "kib_config", + arguments: { key: "search.engine" }, + }); + const readBack = JSON.parse(textOf(getResult)); + expect(readBack["search.engine"]).toBe("hybrid"); + }); + + test("returns error for unknown key", async () => { + const root = await makeTempVault(); + const client = await createClient(root); + + const result = await client.callTool({ + name: "kib_config", + arguments: { key: "nonexistent.key" }, + }); + expect(result.isError).toBe(true); + }); + }); + + // ── kib_skill ────────────────────────────────────────────── + + describe("kib_skill", () => { + test("lists built-in skills", async () => { + const root = await makeTempVault(); + const client = await createClient(root); + + const result = await client.callTool({ + name: "kib_skill", + arguments: { action: "list" }, + }); + expect(result.isError).toBeFalsy(); + + const skills = JSON.parse(textOf(result)); + expect(Array.isArray(skills)).toBe(true); + expect(skills.length).toBeGreaterThan(0); + expect(skills[0].name).toBeDefined(); + expect(skills[0].description).toBeDefined(); + }); + + test("returns error when running without name", async () => { + const root = await makeTempVault(); + const client = await createClient(root); + + const result = await client.callTool({ + name: "kib_skill", + arguments: { action: "run" }, + }); + expect(result.isError).toBe(true); + expect(textOf(result)).toContain("name is required"); + }); + + test("returns error for nonexistent skill", async () => { + const root = await makeTempVault(); + const client = await createClient(root); + + const result = await client.callTool({ + name: "kib_skill", + arguments: { action: "run", name: "nonexistent-skill" }, + }); + expect(result.isError).toBe(true); + expect(textOf(result)).toContain("not found"); + }); + }); + + // ── kib_export ───────────────────────────────────────────── + + describe("kib_export", () => { + test("exports wiki as markdown", async () => { + const root = await makeTempVault(); + await writeWiki(root, "concepts/test.md", articleMd("Test", "Test content.")); + const client = await createClient(root); + + const outputDir = join(tempDir, "export-test"); + const result = await client.callTool({ + name: "kib_export", + arguments: { format: "markdown", output: outputDir }, + }); + expect(result.isError).toBeFalsy(); + + const data = JSON.parse(textOf(result)); + expect(data.format).toBe("markdown"); + expect(data.files).toBeGreaterThan(0); + expect(data.output).toBe(outputDir); + }); + + test("exports wiki as html", async () => { + const root = await makeTempVault(); + await writeWiki(root, "concepts/test.md", articleMd("Test", "Test content.")); + const client = await createClient(root); + + const outputDir = join(tempDir, "export-html-test"); + const result = await client.callTool({ + name: "kib_export", + arguments: { format: "html", output: outputDir }, + }); + expect(result.isError).toBeFalsy(); + + const data = JSON.parse(textOf(result)); + expect(data.format).toBe("html"); + expect(data.files).toBeGreaterThan(0); + }); + }); + + // ── kib_compile with max ────────────────────────────────── + + describe("kib_compile params", () => { + test("accepts dry_run and max params", async () => { + const root = await makeTempVault(); + const client = await createClient(root); + + // dry_run compile on empty vault should succeed + const result = await client.callTool({ + name: "kib_compile", + arguments: { dry_run: true, max: 5 }, + }); + // Will error due to no provider, which is expected + // The point is it doesn't crash on the new params + expect(result).toBeDefined(); + }); + }); + + // ── kib_ingest dry_run ──────────────────────────────────── + + describe("kib_ingest dry_run", () => { + test("dry run returns preview without writing", async () => { + const root = await makeTempVault(); + const filePath = join(tempDir, "dry-run-input.txt"); + await writeFile(filePath, "Test document for dry run verification."); + const client = await createClient(root); + + const result = await client.callTool({ + name: "kib_ingest", + arguments: { source: filePath, dry_run: true }, + }); + expect(result.isError).toBeFalsy(); + + const data = JSON.parse(textOf(result)); + expect(data.dryRun).toBe(true); + expect(data.path).toBeTruthy(); + }); + }); + // ── Tool listing ─────────────────────────────────────────── describe("tool listing", () => { @@ -302,12 +563,15 @@ describe("MCP server", () => { const names = tools.map((t) => t.name).sort(); expect(names).toEqual([ "kib_compile", + "kib_config", + "kib_export", "kib_ingest", "kib_lint", "kib_list", "kib_query", "kib_read", "kib_search", + "kib_skill", "kib_status", ]); }); diff --git a/packages/cli/src/mcp/server.ts b/packages/cli/src/mcp/server.ts index b27387d..dabd0fe 100644 --- a/packages/cli/src/mcp/server.ts +++ b/packages/cli/src/mcp/server.ts @@ -1,6 +1,7 @@ import { compileVault, createProvider, + fixLintIssues, ingestSource, type LLMProvider, lintVault, @@ -15,6 +16,7 @@ import { readRaw, readWiki, SearchIndex, + saveConfig, type VaultConfig, } from "@kibhq/core"; import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; @@ -147,7 +149,7 @@ export function createMcpServer(root: string) { }, ); - // ── kib_read ────────────────────────────────────────────── + // ── kib_read ─────────────────────────────��──────────────── server.tool( "kib_read", @@ -186,10 +188,18 @@ export function createMcpServer(root: string) { .string() .optional() .describe("Filter to articles dated on or after this date (YYYY-MM-DD)"), + scope: z + .enum(["wiki", "raw", "all"]) + .default("all") + .describe("Search scope: wiki articles, raw sources, or all"), }, - async ({ query, limit, tag, since }) => { + async ({ query, limit, tag, since, scope }) => { try { - const index = await ctx.getSearchIndex(); + // Rebuild index with requested scope if not 'all' + const index = scope === "all" ? await ctx.getSearchIndex() : new SearchIndex(); + if (scope !== "all") { + await index.build(root, scope); + } const results = index.search(query, { limit, tag, since }); const prefix = `${root}/`; return json( @@ -252,14 +262,29 @@ export function createMcpServer(root: string) { .optional() .describe("Raw subdirectory override (e.g. 'papers', 'articles')"), tags: z.string().optional().describe("Comma-separated tags"), + dry_run: z + .boolean() + .default(false) + .describe("Preview what would be ingested without writing"), }, - async ({ source, category, tags }) => { + async ({ source, category, tags, dry_run }) => { try { const result = await ingestSource(root, source, { category, tags: tags?.split(",").map((t) => t.trim()), + dryRun: dry_run, }); + if (dry_run) { + return json({ + dryRun: true, + path: result.path, + title: result.title, + wordCount: result.wordCount, + skipped: result.skipped, + }); + } + // Auto-compile after ingest so content is immediately queryable let compiled = null; if (!result.skipped) { @@ -317,8 +342,14 @@ export function createMcpServer(root: string) { force: z.boolean().default(false).describe("Recompile all sources"), source: z.string().optional().describe("Compile only a specific source"), dry_run: z.boolean().default(false).describe("Preview without writing"), + max: z + .number() + .int() + .positive() + .optional() + .describe("Limit number of sources to compile per pass"), }, - async ({ force, source, dry_run }) => { + async ({ force, source, dry_run, max }) => { try { const provider = await ctx.getProvider(); const config = await ctx.getConfig(); @@ -326,6 +357,7 @@ export function createMcpServer(root: string) { force, dryRun: dry_run, sourceFilter: source, + maxSources: max, }); ctx.invalidateSearch(); return json({ @@ -345,7 +377,7 @@ export function createMcpServer(root: string) { server.tool( "kib_lint", - "Run health checks on the wiki and report issues", + "Run health checks on the wiki and report issues. Use fix=true to auto-fix fixable issues (recompile stale sources, create missing articles).", { rule: z .string() @@ -353,11 +385,44 @@ export function createMcpServer(root: string) { .describe( "Run only a specific rule: orphan, stale, missing, broken-link, frontmatter, contradiction", ), + fix: z + .boolean() + .default(false) + .describe("Auto-fix fixable issues (recompile stale, create missing articles)"), }, - async ({ rule }) => { + async ({ rule, fix }) => { try { const provider = await ctx.getProvider().catch(() => undefined); const result = await lintVault(root, { ruleFilter: rule, provider }); + + if (fix) { + const fixable = result.diagnostics.filter((d) => d.fixable); + if (fixable.length > 0) { + let fixProvider: LLMProvider | undefined; + let config: VaultConfig | undefined; + const hasStale = fixable.some((d) => d.rule === "stale"); + if (hasStale) { + try { + config = await ctx.getConfig(); + fixProvider = await ctx.getProvider(); + } catch { + // Provider not available — stale fixes will be skipped + } + } + const fixResult = await fixLintIssues(root, result.diagnostics, fixProvider, config); + ctx.invalidateSearch(); + return json({ + diagnostics: result.diagnostics, + errors: result.errors, + warnings: result.warnings, + infos: result.infos, + fixed: fixResult.fixed, + fixSkipped: fixResult.skipped, + fixErrors: fixResult.errors, + }); + } + } + return json({ errors: result.errors, warnings: result.warnings, @@ -370,6 +435,114 @@ export function createMcpServer(root: string) { }, ); + // ── kib_config ──────────────────────────────────────────── + + server.tool( + "kib_config", + "Get or set vault configuration. Call with no arguments to list all config. Pass key to read a value, pass key+value to set it.", + { + key: z + .string() + .optional() + .describe( + "Dot-separated config key (e.g. 'provider.default', 'provider.model', 'search.engine')", + ), + value: z.string().optional().describe("Value to set. Omit to read the current value."), + }, + async ({ key, value }) => { + try { + const config = await loadConfig(root); + + // List all config + if (!key) { + return json(config); + } + + // Get a value + if (!value) { + const val = getNestedValue(config, key); + if (val === undefined) return err(`Unknown config key: ${key}`); + return json({ [key]: val }); + } + + // Set a value + const parsed = parseConfigValue(value); + const updated = setNestedValue(config, key, parsed); + if (!updated) return err(`Unknown config key: ${key}`); + await saveConfig(root, config); + return json({ [key]: parsed, saved: true }); + } catch (e) { + return err((e as Error).message); + } + }, + ); + + // ── kib_skill ─────────────────────────��─────────────────── + + server.tool( + "kib_skill", + "List or run vault skills. Skills are reusable LLM-powered operations (summarize, flashcards, connections, etc).", + { + action: z + .enum(["list", "run"]) + .describe("'list' to see available skills, 'run' to execute one"), + name: z.string().optional().describe("Skill name to run (required when action is 'run')"), + }, + async ({ action, name }) => { + try { + const { loadSkills, findSkill, runSkill } = await import("@kibhq/core"); + + if (action === "list") { + const skills = await loadSkills(root); + return json(skills.map((s) => ({ name: s.name, description: s.description }))); + } + + // action === "run" + if (!name) return err("Skill name is required. Use action='list' to see available skills."); + + const skill = await findSkill(root, name); + if (!skill) + return err(`Skill "${name}" not found. Use action='list' to see available skills.`); + + let provider: LLMProvider | undefined; + if (skill.llm?.required) { + const config = await ctx.getConfig(); + const modelKey = skill.llm.model === "fast" ? "fast_model" : "model"; + const model = config.provider[modelKey as keyof typeof config.provider] as string; + provider = await createProvider(config.provider.default, model); + } + + const result = await runSkill(root, skill, { provider }); + return json({ skill: skill.name, content: result.content ?? null }); + } catch (e) { + return err((e as Error).message); + } + }, + ); + + // ── kib_export ──────────────────────────────────────────── + + server.tool( + "kib_export", + "Export the wiki as a clean markdown bundle or static HTML site. Returns the output directory path and file count.", + { + format: z + .enum(["markdown", "html"]) + .default("markdown") + .describe("Export format: 'markdown' (clean, no frontmatter) or 'html' (static site)"), + output: z.string().optional().describe("Output directory path. Defaults to /export"), + }, + async ({ format, output }) => { + try { + const { exportVault } = await import("./export-helper.js"); + const result = await exportVault(root, format, output); + return json(result); + } catch (e) { + return err((e as Error).message); + } + }, + ); + // ── Resources ───────────────────────────────────────────── server.resource("wiki-index", "wiki://index", { mimeType: "text/markdown" }, async () => { @@ -397,6 +570,45 @@ export function createMcpServer(root: string) { return server; } +// ─── Config Helpers ───────────────────────────────────────────── + +function getNestedValue(obj: Record, path: string): unknown { + const parts = path.split("."); + let current: unknown = obj; + for (const part of parts) { + if (current == null || typeof current !== "object") return undefined; + current = (current as Record)[part]; + } + return current; +} + +function setNestedValue(obj: Record, path: string, value: unknown): boolean { + const parts = path.split("."); + let current: unknown = obj; + for (let i = 0; i < parts.length - 1; i++) { + if (current == null || typeof current !== "object") return false; + current = (current as Record)[parts[i]!]; + } + const lastKey = parts[parts.length - 1]!; + if ( + current == null || + typeof current !== "object" || + !(lastKey in (current as Record)) + ) { + return false; + } + (current as Record)[lastKey] = value; + return true; +} + +function parseConfigValue(val: string): unknown { + if (val === "true") return true; + if (val === "false") return false; + const num = Number(val); + if (!Number.isNaN(num) && val.trim() !== "") return num; + return val; +} + export async function startMcpServer(root: string) { const server = createMcpServer(root); const transport = new StdioServerTransport();