Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions src/bookmark-classify-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,10 @@ export interface LlmClassifyResult {
}

export async function classifyWithLlm(
options: { engine: ResolvedEngine; onBatch?: (done: number, total: number) => void },
options: { engine: ResolvedEngine; limit?: number; onBatch?: (done: number, total: number) => void },
): Promise<LlmClassifyResult> {
const { engine } = options;
const limitClause = options.limit && options.limit > 0 ? ` LIMIT ${Math.floor(options.limit)}` : '';

const dbPath = twitterBookmarksIndexPath();
const db = await openDb(dbPath);
Expand All @@ -122,7 +123,7 @@ export async function classifyWithLlm(
const rows = db.exec(
`SELECT id, text, author_handle, links_json FROM bookmarks
WHERE primary_category = 'unclassified' OR primary_category IS NULL
ORDER BY RANDOM()`
ORDER BY RANDOM()${limitClause}`
);

if (!rows.length || !rows[0].values.length) {
Expand Down Expand Up @@ -223,9 +224,10 @@ ${items}`;
}

export async function classifyDomainsWithLlm(
options: { engine: ResolvedEngine; all?: boolean; onBatch?: (done: number, total: number) => void },
options: { engine: ResolvedEngine; all?: boolean; limit?: number; onBatch?: (done: number, total: number) => void },
): Promise<LlmClassifyResult> {
const { engine } = options;
const limitClause = options.limit && options.limit > 0 ? ` LIMIT ${Math.floor(options.limit)}` : '';

const dbPath = twitterBookmarksIndexPath();
const db = await openDb(dbPath);
Expand All @@ -240,7 +242,7 @@ export async function classifyDomainsWithLlm(
: 'primary_domain IS NULL';
const rows = db.exec(
`SELECT id, text, author_handle, categories FROM bookmarks
WHERE ${where} ORDER BY RANDOM()`
WHERE ${where} ORDER BY RANDOM()${limitClause}`
);

if (!rows.length || !rows[0].values.length) {
Expand Down
19 changes: 12 additions & 7 deletions src/bookmarks-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { getTwitterBookmarksStatus, latestBookmarkSyncAt } from './bookmarks.js'
import { buildIndex } from './bookmarks-db.js';
import { loadTwitterOAuthToken } from './xauth.js';
import { syncBookmarksGraphQL, type SyncProgress } from './graphql-bookmarks.js';
import { renderStatusSections } from './status-render.js';

export interface BookmarkEnableResult {
synced: boolean;
Expand Down Expand Up @@ -59,13 +60,17 @@ export async function getBookmarkStatusView(): Promise<BookmarkStatusView> {
}

export function formatBookmarkStatus(view: BookmarkStatusView): string {
return [
'Bookmarks',
` bookmarks: ${view.bookmarkCount}`,
` last updated: ${view.lastUpdated ?? 'never'}`,
` sync mode: ${view.mode}`,
` cache: ${view.cachePath}`,
].join('\n');
return renderStatusSections([
{
title: 'Bookmarks',
lines: [
{ label: 'bookmarks:', value: String(view.bookmarkCount) },
{ label: 'last updated:', value: view.lastUpdated ?? 'never' },
{ label: 'sync mode:', value: view.mode },
{ label: 'cache:', value: view.cachePath },
],
},
]);
}

export function formatBookmarkSummary(view: BookmarkStatusView): string {
Expand Down
15 changes: 11 additions & 4 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -966,25 +966,28 @@ export function buildCli() {
.command('classify')
.description('Classify bookmarks by category and domain using LLM (requires claude or codex CLI)')
.option('--regex', 'Use simple regex classification instead of LLM')
.option('--limit <n>', 'Only classify up to N bookmarks (useful for testing)', (v: string) => Number(v))
.addOption(engineOption())
.action(safe(async (options) => {
if (!requireData()) return;
if (options.regex) {
process.stderr.write('Classifying bookmarks (regex)...\n');
const result = await classifyAndRebuild();
console.log(`Indexed ${result.recordCount} bookmarks \u2192 ${result.dbPath}`);
console.log(`Indexed ${result.recordCount} bookmarks ${result.dbPath}`);
console.log(formatClassificationSummary(result.summary));
} else {
const engine = await resolveEngine({ override: options.engine ? String(options.engine) : undefined });
const limit = Number.isFinite(options.limit) && options.limit > 0 ? options.limit : undefined;

let catStart = Date.now();
process.stderr.write('Classifying categories with LLM (batches of 50, ~2 min per batch)...\n');
const catResult = await classifyWithLlm({
engine,
limit,
onBatch: (done: number, total: number) => {
const pct = total > 0 ? Math.round((done / total) * 100) : 0;
const elapsed = Math.round((Date.now() - catStart) / 1000);
process.stderr.write(` Categories: ${done}/${total} (${pct}%) \u2502 ${elapsed}s elapsed\n`);
process.stderr.write(` Categories: ${done}/${total} (${pct}%) ${elapsed}s elapsed\n`);
},
});
console.log(`\nEngine: ${catResult.engine}`);
Expand All @@ -995,10 +998,11 @@ export function buildCli() {
const domResult = await classifyDomainsWithLlm({
engine,
all: false,
limit,
onBatch: (done: number, total: number) => {
const pct = total > 0 ? Math.round((done / total) * 100) : 0;
const elapsed = Math.round((Date.now() - domStart) / 1000);
process.stderr.write(` Domains: ${done}/${total} (${pct}%) \u2502 ${elapsed}s elapsed\n`);
process.stderr.write(` Domains: ${done}/${total} (${pct}%) ${elapsed}s elapsed\n`);
},
});
console.log(`\nDomains: ${domResult.classified}/${domResult.totalUnclassified} classified`);
Expand All @@ -1011,19 +1015,22 @@ export function buildCli() {
.command('classify-domains')
.description('Classify bookmarks by subject domain using LLM (ai, finance, etc.)')
.option('--all', 'Re-classify all bookmarks, not just missing')
.option('--limit <n>', 'Only classify up to N bookmarks (useful for testing)', (v: string) => Number(v))
.addOption(engineOption())
.action(safe(async (options) => {
if (!requireData()) return;
const engine = await resolveEngine({ override: options.engine ? String(options.engine) : undefined });
const limit = Number.isFinite(options.limit) && options.limit > 0 ? options.limit : undefined;
const start = Date.now();
process.stderr.write('Classifying bookmark domains with LLM (batches of 50, ~2 min per batch)...\n');
const result = await classifyDomainsWithLlm({
engine,
all: options.all ?? false,
limit,
onBatch: (done: number, total: number) => {
const pct = total > 0 ? Math.round((done / total) * 100) : 0;
const elapsed = Math.round((Date.now() - start) / 1000);
process.stderr.write(` Domains: ${done}/${total} (${pct}%) \u2502 ${elapsed}s elapsed\n`);
process.stderr.write(` Domains: ${done}/${total} (${pct}%) ${elapsed}s elapsed\n`);
},
});
console.log(`\nDomains: ${result.classified}/${result.totalUnclassified} classified`);
Expand Down
Loading