Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,10 @@ kib should silently learn from everything you read without you thinking about it

### Instant Value Without Compile
Most of kib's value is locked behind `kib compile`. That's wrong — value should be immediate on ingest.
- [ ] Search + query over raw sources directly (no compile required)
- [ ] Compile becomes an optional enrichment step, not a prerequisite
- [ ] Incremental indexing: search index updates on ingest, not compile
- [ ] "Ask about this source" — query a single raw source without compiling the whole vault
- [x] Search + query over raw sources directly (no compile required)
- [x] Compile becomes an optional enrichment step, not a prerequisite
- [x] Incremental indexing: search index updates on ingest, not compile
- [x] "Ask about this source" — query a single raw source without compiling the whole vault

### Beyond CLI
CLI-only means developer-only. The knowledge is valuable to everyone.
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/src/commands/ingest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ export async function ingest(sources: string[], opts: IngestOpts) {
log.dim("(dry run — no files were written)");
} else {
log.blank();
log.dim("run kib compile to update the wiki");
log.dim("search and query ready — run kib compile to enrich into wiki articles");
}
} else if (skipped > 0) {
log.dim(`All ${skipped} source${skipped === 1 ? "" : "s"} already ingested`);
Expand Down
13 changes: 11 additions & 2 deletions packages/cli/src/commands/query.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { resolve } from "node:path";
import type { LLMProvider } from "@kibhq/core";
import {
createProvider,
Expand All @@ -14,6 +15,7 @@ import { createSpinner } from "../ui/spinner.js";
interface QueryOpts {
file?: boolean;
sources?: boolean;
source?: string;
json?: boolean;
}

Expand Down Expand Up @@ -47,13 +49,19 @@ export async function query(question: string, opts: QueryOpts) {

const { queryVault } = await import("@kibhq/core");

// Resolve --source path to absolute
const sourcePath = opts.source ? resolve(opts.source) : undefined;

debug(`vault root: ${root}`);
debug(`provider: ${config.provider.default}, model: ${config.provider.model}`);
debug(`question: "${question}"`);
if (sourcePath) debug(`source: ${sourcePath}`);

log.header("querying knowledge base");
log.header(sourcePath ? "querying source" : "querying knowledge base");

const spinner = createSpinner("Searching and generating answer...");
const spinner = createSpinner(
sourcePath ? "Reading source and generating answer..." : "Searching and generating answer...",
);
spinner.start();
const endQuery = debugTime("queryVault");

Expand All @@ -62,6 +70,7 @@ export async function query(question: string, opts: QueryOpts) {
const result = await queryVault(root, question, provider, {
autoFile,
autoFileThreshold: config.query.auto_file_threshold,
source: sourcePath,
});
endQuery();
debug(`sources used: ${result.sourcePaths.length}`);
Expand Down
1 change: 1 addition & 0 deletions packages/cli/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ program
.option("--file", "auto-file to wiki/outputs/")
.option("--no-file", "never file")
.option("--sources", "show which articles were used")
.option("--source <path>", "query a specific source file directly")
.option("--json", "JSON output")
.action(async (question, opts) => {
const { query } = await import("./commands/query.js");
Expand Down
21 changes: 16 additions & 5 deletions packages/cli/src/mcp/server.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { join } from "node:path";
import {
compileVault,
createProvider,
Expand Down Expand Up @@ -247,7 +248,7 @@ export function createMcpServer(root: string) {

server.tool(
"kib_query",
"Ask a question against the knowledge base using RAG (retrieval-augmented generation). Requires a configured LLM provider.",
"Ask a question against the knowledge base using RAG (retrieval-augmented generation). Searches both raw sources and compiled wiki articles, so it works immediately after ingest — no compile needed. Requires a configured LLM provider.",
{
question: z.string().describe("Question to ask"),
max_articles: z
Expand All @@ -256,16 +257,24 @@ export function createMcpServer(root: string) {
.positive()
.max(10)
.default(5)
.describe("Max articles to use as context"),
.describe("Max sources/articles to use as context"),
source: z
.string()
.optional()
.describe(
"Path to a specific source to query against (e.g. 'raw/articles/my-source.md'). Skips search and uses only this source as context.",
),
},
async ({ question, max_articles }) => {
async ({ question, max_articles, source }) => {
try {
const provider = await ctx.getProvider();
const config = await ctx.getConfig();
const sourcePath = source ? join(root, source) : undefined;
const result = await queryVault(root, question, provider, {
maxArticles: max_articles,
autoFile: config.query.auto_file,
autoFileThreshold: config.query.auto_file_threshold,
source: sourcePath,
});
const filed = result.filedTo ? `\nFiled to: ${result.filedTo}` : "";
return ok(
Expand All @@ -281,7 +290,7 @@ export function createMcpServer(root: string) {

server.tool(
"kib_ingest",
"Ingest a source (URL or file path) into the knowledge base. No API key needed for ingestion. Auto-compiles after ingest if an LLM provider is configured; otherwise sources are saved but not compiled.",
"Ingest a source (URL or file path) into the knowledge base. No API key needed for ingestion. Sources are immediately searchable and queryable after ingest. Auto-compiles into wiki articles if an LLM provider is configured.",
{
source: z.string().describe("URL or file path to ingest"),
category: z
Expand Down Expand Up @@ -332,9 +341,10 @@ export function createMcpServer(root: string) {
title: result.title,
wordCount: result.wordCount,
skipped: result.skipped,
searchable: true,
compiled: null,
compileError: isProviderErr
? "No LLM provider configured. The source was saved but not compiled. Tell the user to set ANTHROPIC_API_KEY, OPENAI_API_KEY, or start Ollama, then run `kib compile`."
? "No LLM provider configured. Source is searchable and queryable immediately. To compile into wiki articles, set ANTHROPIC_API_KEY, OPENAI_API_KEY, or start Ollama, then run `kib compile`."
: msg,
});
}
Expand All @@ -347,6 +357,7 @@ export function createMcpServer(root: string) {
wordCount: result.wordCount,
skipped: result.skipped,
skipReason: result.skipReason,
searchable: !result.skipped,
compiled: compiled
? {
articlesCreated: compiled.articlesCreated,
Expand Down
129 changes: 129 additions & 0 deletions packages/core/src/ingest/ingest.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ import { existsSync } from "node:fs";
import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { SearchIndex } from "../search/engine.js";
import { VectorIndex } from "../search/vector.js";
import type { LLMProvider } from "../types.js";
import { initVault, listImageAssets, loadManifest } from "../vault.js";
import { ingestSource } from "./ingest.js";
Expand Down Expand Up @@ -228,6 +230,133 @@ describe("ingestSource", () => {
expect(rawContent).toContain("word_count:");
});

test("ingest updates search index so source is immediately searchable", async () => {
const root = await makeTempVault();

const testFile = join(root, "quantum-computing.md");
await writeFile(
testFile,
"# Quantum Computing\n\nQuantum computers use qubits and superposition to solve problems.",
);

await ingestSource(root, testFile);

// Load the search index and verify the source is searchable
const index = new SearchIndex();
const loaded = await index.load(root);
expect(loaded).toBe(true);
expect(index.documentCount).toBeGreaterThan(0);

const results = index.search("quantum computing");
expect(results.length).toBeGreaterThan(0);
expect(results[0]!.title).toBe("Quantum Computing");
});

test("multiple ingests build up the search index incrementally", async () => {
const root = await makeTempVault();

const file1 = join(root, "first.md");
const file2 = join(root, "second.md");
await writeFile(file1, "# Machine Learning\n\nML uses statistical models to learn from data.");
await writeFile(
file2,
"# Deep Learning\n\nDeep learning uses neural networks with many layers.",
);

await ingestSource(root, file1);
await ingestSource(root, file2);

const index = new SearchIndex();
const loaded = await index.load(root);
expect(loaded).toBe(true);
expect(index.documentCount).toBe(2);

// Both should be searchable
const mlResults = index.search("machine learning");
expect(mlResults.length).toBeGreaterThan(0);

const dlResults = index.search("deep learning neural");
expect(dlResults.length).toBeGreaterThan(0);
expect(dlResults[0]!.title).toBe("Deep Learning");
});

test("ingest updates vector index when provider has embed()", async () => {
const root = await makeTempVault();

// Mock provider with embed support
const embedProvider: LLMProvider = {
name: "mock-embed",
async complete() {
return {
content: "",
usage: { inputTokens: 0, outputTokens: 0 },
stopReason: "end_turn" as const,
};
},
async *stream() {},
async embed(texts: string[]): Promise<Float32Array[]> {
return texts.map((text) => {
const vec = new Float32Array(32);
const lower = text.toLowerCase();
for (let i = 0; i < 32; i++) {
const char = String.fromCharCode(97 + (i % 26));
vec[i] = (lower.match(new RegExp(char, "g")) ?? []).length / lower.length;
}
return vec;
});
},
};

const testFile = join(root, "quantum-ml.md");
await writeFile(
testFile,
"# Quantum Machine Learning\n\nQuantum computing applied to machine learning tasks.",
);

await ingestSource(root, testFile, { provider: embedProvider });

// Verify the vector index was updated
const vectorIndex = new VectorIndex();
const loaded = await vectorIndex.load(root);
expect(loaded).toBe(true);
expect(vectorIndex.documentCount).toBe(1);

const results = await vectorIndex.search("quantum machine learning", embedProvider);
expect(results.length).toBe(1);
expect(results[0]!.title).toBe("Quantum Machine Learning");
});

test("ingest skips vector index when provider lacks embed()", async () => {
const root = await makeTempVault();

// Provider without embed
const noEmbedProvider: LLMProvider = {
name: "no-embed",
async complete() {
return {
content: "",
usage: { inputTokens: 0, outputTokens: 0 },
stopReason: "end_turn" as const,
};
},
async *stream() {},
};

const testFile = join(root, "test-no-embed.md");
await writeFile(testFile, "# No Embed\n\nContent without embeddings.");

await ingestSource(root, testFile, { provider: noEmbedProvider });

// BM25 index should exist
const bm25 = new SearchIndex();
expect(await bm25.load(root)).toBe(true);
expect(bm25.documentCount).toBe(1);

// Vector index should NOT exist (no embed support)
const vectorIndex = new VectorIndex();
expect(await vectorIndex.load(root)).toBe(false);
});

test("image ingest saves binary to wiki/images/", async () => {
const root = await makeTempVault();

Expand Down
36 changes: 36 additions & 0 deletions packages/core/src/ingest/ingest.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import { join } from "node:path";
import { RAW_DIR } from "../constants.js";
import { hash } from "../hash.js";
import { withLock } from "../lockfile.js";
import { SearchIndex } from "../search/engine.js";
import { VectorIndex } from "../search/vector.js";
import type { IngestResult, LLMProvider, Manifest, SourceEntry, SourceType } from "../types.js";
import { appendLog, loadManifest, saveManifest, writeImageAsset, writeRaw } from "../vault.js";
import type { Extractor } from "./extractors/interface.js";
Expand Down Expand Up @@ -154,6 +158,38 @@ export async function ingestSource(
await saveManifest(root, manifest);
await appendLog(root, "ingest", `"${extracted.title}" (${sourceType}) → raw/${relativePath}`);

// Incrementally update search indexes so the source is immediately searchable
const docPath = join(root, RAW_DIR, relativePath);
try {
const index = new SearchIndex();
await index.load(root);
index.addDocument({
path: docPath,
title: extracted.title,
content: extracted.content,
tags: options.tags,
date: now,
});
await index.save(root);
} catch {
// BM25 index update is best-effort — don't fail the ingest
}

// Incrementally update vector index if a provider with embed() is available
if (options.provider?.embed) {
try {
const vectorIndex = new VectorIndex();
await vectorIndex.load(root);
await vectorIndex.addDocument(
{ path: docPath, title: extracted.title, content: extracted.content },
options.provider,
);
await vectorIndex.save(root);
} catch {
// Vector index update is best-effort
}
}

return {
sourceId,
path: `raw/${relativePath}`,
Expand Down
Loading
Loading