Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,18 +116,18 @@ What's built, what's next, and what's deferred.
## v0.7.0 — Advanced Search

### Vector/Semantic Search
- [ ] Optional embedding-based search alongside BM25
- [ ] Embedding provider: OpenAI `text-embedding-3-small`, or local via Ollama
- [ ] Hybrid scoring: combine BM25 + cosine similarity
- [ ] Store embeddings in `.kb/cache/embeddings.bin`
- [ ] Rebuild embeddings on compile
- [x] Optional embedding-based search alongside BM25
- [x] Embedding provider: OpenAI `text-embedding-3-small`, or local via Ollama
- [x] Hybrid scoring: combine BM25 + cosine similarity (Reciprocal Rank Fusion)
- [x] Store embeddings in `.kb/cache/vectors.idx` (binary Float32Array format)
- [x] Rebuild embeddings on compile

### Search Improvements
- [ ] Fuzzy matching for typo tolerance
- [ ] Phrase search with quotes: `kib search '"attention mechanism"'`
- [ ] Tag-based filtering: `kib search "transformers" --tag deep-learning`
- [ ] Date range filtering: `kib search --since 2024-01-01`
- [ ] Search result highlighting in terminal (bold matched terms)
- [x] Fuzzy matching for typo tolerance (edit distance ≤ 1 for tokens ≥ 4 chars)
- [x] Phrase search with quotes: `kib search '"attention mechanism"'`
- [x] Tag-based filtering: `kib search "transformers" --tag deep-learning`
- [x] Date range filtering: `kib search --since 2024-01-01`
- [x] Search result highlighting in terminal (bold matched terms)

---

Expand Down
1 change: 1 addition & 0 deletions bun.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 20 additions & 4 deletions packages/cli/src/commands/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ interface SearchOpts {
limit?: number;
json?: boolean;
engine?: "builtin" | "vector" | "hybrid";
tag?: string[];
since?: string;
}

export async function search(term: string, opts: SearchOpts) {
Expand All @@ -30,6 +32,8 @@ export async function search(term: string, opts: SearchOpts) {

const scope = opts.wiki ? "wiki" : opts.raw ? "raw" : "all";
const limit = opts.limit ?? 20;
const tags = opts.tag ?? undefined;
const since = opts.since ?? undefined;

// Determine search engine
let engine = opts.engine;
Expand All @@ -44,13 +48,17 @@ export async function search(term: string, opts: SearchOpts) {

debug(`vault root: ${root}`);
debug(`scope: ${scope}, limit: ${limit}, engine: ${engine}, term: "${term}"`);
if (tags) debug(`tag filter: ${tags.join(", ")}`);
if (since) debug(`since filter: ${since}`);

const spinner = createSpinner("Searching...");
spinner.start();

let results: SearchResult[];
let elapsed: number;

const searchOpts = { limit, tag: tags, since, highlight: !opts.json };

if (engine === "hybrid" || engine === "vector") {
const endIndex = debugTime("load/build hybrid index");
const bm25 = new SearchIndex();
Expand Down Expand Up @@ -83,6 +91,15 @@ export async function search(term: string, opts: SearchOpts) {
const start = performance.now();
results = await hybrid.search(term, provider, { limit });
elapsed = Math.round(performance.now() - start);

// Apply tag/date filters and highlighting post-hoc for hybrid
// (BM25 side supports it natively, but hybrid fuses results)
if (tags || since) {
// Re-run BM25 with filters to get filtered results
const filteredBm25 = bm25.search(term, searchOpts);
const filteredPaths = new Set(filteredBm25.map((r) => r.path));
results = results.filter((r) => filteredPaths.has(r.path));
}
} else {
endIndex();
// Fallback path
Expand All @@ -94,7 +111,7 @@ export async function search(term: string, opts: SearchOpts) {
await index.save(root);
}
const start = performance.now();
results = index.search(term, { limit });
results = index.search(term, searchOpts);
elapsed = Math.round(performance.now() - start);
}
} else {
Expand All @@ -112,7 +129,7 @@ export async function search(term: string, opts: SearchOpts) {
endIndex();

const start = performance.now();
results = index.search(term, { limit });
results = index.search(term, searchOpts);
elapsed = Math.round(performance.now() - start);
}

Expand Down Expand Up @@ -143,14 +160,13 @@ export async function search(term: string, opts: SearchOpts) {
console.log(` ${num}. ${title} ${score}`);
console.log(` ${dimPath(r.path)}`);
if (r.snippet) {
console.log(` ${truncate(r.snippet, 80)}`);
console.log(` ${truncate(r.snippet, 120)}`);
}
console.log();
}
}

function dimPath(path: string): string {
// Import chalk dynamically to keep lazy loading
return `\x1b[2m${path}\x1b[0m`;
}

Expand Down
7 changes: 7 additions & 0 deletions packages/cli/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ program
.option("--limit <n>", "max results", Number.parseInt)
.option("--json", "JSON output")
.option("--engine <type>", "search engine: builtin, vector, hybrid")
.option(
"--tag <tag>",
"filter by tag (repeatable)",
(val: string, prev: string[]) => [...prev, val],
[] as string[],
)
.option("--since <date>", "filter to articles dated on or after (YYYY-MM-DD)")
.action(async (term, opts) => {
const { search } = await import("./commands/search.js");
await search(term, opts);
Expand Down
178 changes: 178 additions & 0 deletions packages/cli/src/mcp/export-helper.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
import { copyFile, mkdir, readFile, writeFile } from "node:fs/promises";
import { join, relative } from "node:path";
import { listImageAssets, listWiki, parseFrontmatter, WIKI_DIR } from "@kibhq/core";

export async function exportVault(
root: string,
format: "markdown" | "html",
output?: string,
): Promise<{ format: string; output: string; files: number }> {
const outputDir = output ?? join(root, "export");

let fileCount: number;
switch (format) {
case "markdown":
fileCount = await exportMarkdown(root, outputDir);
break;
case "html":
fileCount = await exportHtml(root, outputDir);
break;
default:
throw new Error(`Unsupported format: ${format}. Use 'markdown' or 'html'.`);
}

return { format, output: outputDir, files: fileCount };
}

async function exportMarkdown(root: string, outputDir: string): Promise<number> {
const wikiDir = join(root, WIKI_DIR);
const files = await listWiki(root);

await copyImageAssets(root, outputDir);

for (const filePath of files) {
const content = await readFile(filePath, "utf-8");
const relPath = relative(wikiDir, filePath);
const outPath = join(outputDir, relPath);

await mkdir(join(outPath, ".."), { recursive: true });

const cleaned = content.replace(/^---[\s\S]*?---\s*\n/, "");
const resolved = cleaned.replace(
/\[\[([^\]]+)\]\]/g,
(_, slug: string) => `[${slug}](${slug}.md)`,
);

await writeFile(outPath, resolved, "utf-8");
}

return files.length;
}

const SHARED_CSS = `
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; max-width: 720px; margin: 2rem auto; padding: 0 1rem; line-height: 1.6; color: #1a1a1a; }
a { color: #0066cc; }
code { background: #f4f4f4; padding: 0.2em 0.4em; border-radius: 3px; font-size: 0.9em; }
pre { background: #f4f4f4; padding: 1rem; border-radius: 6px; overflow-x: auto; }
pre code { background: none; padding: 0; }
nav { margin-bottom: 2rem; padding-bottom: 1rem; border-bottom: 1px solid #eee; }
nav a { margin-right: 1rem; }
img { max-width: 100%; height: auto; border-radius: 6px; margin: 1rem 0; }`;

async function exportHtml(root: string, outputDir: string): Promise<number> {
const wikiDir = join(root, WIKI_DIR);
const files = await listWiki(root);

await mkdir(outputDir, { recursive: true });
const imageFiles = await copyImageAssets(root, outputDir);

const articles: { title: string; htmlPath: string }[] = [];

for (const filePath of files) {
const content = await readFile(filePath, "utf-8");
const relPath = relative(wikiDir, filePath);
const { frontmatter, body } = parseFrontmatter(content);
const title = (frontmatter.title as string) ?? relPath.replace(/\.md$/, "");
const htmlPath = relPath.replace(/\.md$/, ".html");

const depth = htmlPath.split("/").length - 1;
const prefix = depth > 0 ? "../".repeat(depth) : "";

const html = simpleMarkdownToHtml(body, prefix);

const page = `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>${escapeHtml(title)}</title>
<style>${SHARED_CSS}</style>
</head>
<body>
<nav><a href="${prefix}index.html">Index</a>${imageFiles.length > 0 ? ` <a href="${prefix}gallery.html">Gallery</a>` : ""}</nav>
<h1>${escapeHtml(title)}</h1>
${html}
</body>
</html>`;

const outPath = join(outputDir, htmlPath);
await mkdir(join(outPath, ".."), { recursive: true });
await writeFile(outPath, page, "utf-8");

articles.push({ title, htmlPath });
}

const indexHtml = `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Knowledge Base</title>
<style>${SHARED_CSS}</style>
</head>
<body>
<h1>Knowledge Base</h1>
<p>${articles.length} articles</p>
<ul>
${articles
.sort((a, b) => a.title.localeCompare(b.title))
.map((a) => `<li><a href="${a.htmlPath}">${escapeHtml(a.title)}</a></li>`)
.join("\n ")}
</ul>
</body>
</html>`;

await writeFile(join(outputDir, "index.html"), indexHtml, "utf-8");

return articles.length;
}

async function copyImageAssets(root: string, outputDir: string): Promise<string[]> {
const imageFiles = await listImageAssets(root);
if (imageFiles.length === 0) return [];

const srcDir = join(root, WIKI_DIR, "images");
const destDir = join(outputDir, "images");
await mkdir(destDir, { recursive: true });

for (const filename of imageFiles) {
await copyFile(join(srcDir, filename), join(destDir, filename));
}

return imageFiles;
}

function simpleMarkdownToHtml(md: string, imagePrefix = ""): string {
return md
.replace(/```(\w*)\n([\s\S]*?)```/g, "<pre><code>$2</code></pre>")
.replace(/^### (.+)$/gm, "<h3>$1</h3>")
.replace(/^## (.+)$/gm, "<h2>$1</h2>")
.replace(/^# (.+)$/gm, "<h1>$1</h1>")
.replace(/\*\*(.+?)\*\*/g, "<strong>$1</strong>")
.replace(/\*(.+?)\*/g, "<em>$1</em>")
.replace(/`([^`]+)`/g, "<code>$1</code>")
.replace(
/!\[([^\]]*)\]\((images\/[^)]+)\)/g,
(_, alt: string, src: string) => `<img src="${imagePrefix}${src}" alt="${escapeHtml(alt)}">`,
)
.replace(
/!\[([^\]]*)\]\((https?:\/\/[^)]+)\)/g,
(_, alt: string, src: string) => `<img src="${src}" alt="${escapeHtml(alt)}">`,
)
.replace(/\[\[([^\]]+)\]\]/g, '<a href="$1.html">$1</a>')
.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>')
.replace(/^- (.+)$/gm, "<li>$1</li>")
.replace(/\n\n/g, "</p><p>")
.replace(/^/, "<p>")
.replace(/$/, "</p>")
.replace(/<p><li>/g, "<ul><li>")
.replace(/<\/li><\/p>/g, "</li></ul>");
}

function escapeHtml(str: string): string {
return str
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;");
}
Loading
Loading