From e84592a63c087c0a5bf22ad192695b817935efdd Mon Sep 17 00:00:00 2001 From: hude Date: Fri, 29 May 2026 17:38:18 +0900 Subject: [PATCH] Split docs cleanup changes --- README.md | 4 +- docs/AGENT_MEMORY_API.md | 1 - docs/AGENT_TOOL_CALLING.md | 2 - docs/RELEASE.md | 33 +---- docs/internal/LLM_WIKI_STRUCTURE.md | 8 +- docs/internal/WIKI_BROWSER_PLAN.md | 7 +- docs/internal/llm-wiki-structure.svg | 111 ----------------- docs/validation/VFS_CORRECTNESS_CHECKLIST.md | 1 - .../VFS_DEPLOYED_CANISTER_BENCHMARKS.md | 7 +- fixtures/beam/beam_evidence_sample.json | 38 ------ fixtures/beam/beam_sample.json | 38 ------ packaging/homebrew/Formula/kinic-vfs-cli.rb | 20 --- plugins/hermes/README.md | 2 +- plugins/runtime/kinic_agent_runtime/evolve.py | 2 +- scripts/bench/run_beam_bench.sh | 12 -- scripts/bench/run_beam_grounded_slice.sh | 116 ------------------ scripts/bench/run_beam_improvement_slices.sh | 48 -------- scripts/bench/run_beam_prepare.sh | 12 -- scripts/bench/run_canister_vfs_workload.sh | 5 +- 19 files changed, 17 insertions(+), 450 deletions(-) delete mode 100644 docs/internal/llm-wiki-structure.svg delete mode 100644 fixtures/beam/beam_evidence_sample.json delete mode 100644 fixtures/beam/beam_sample.json delete mode 100644 packaging/homebrew/Formula/kinic-vfs-cli.rb delete mode 100755 scripts/bench/run_beam_bench.sh delete mode 100755 scripts/bench/run_beam_grounded_slice.sh delete mode 100755 scripts/bench/run_beam_improvement_slices.sh delete mode 100755 scripts/bench/run_beam_prepare.sh diff --git a/README.md b/README.md index 92f15534..d723d003 100644 --- a/README.md +++ b/README.md @@ -7,14 +7,14 @@ It stores durable wiki nodes in an Internet Computer canister and exposes them t Vector databases are useful for retrieving nearby text fragments from large corpora. Agent memory has a different shape. Agents need stable places for current decisions, source evidence, open questions, operating procedures, and relationships between notes. -Kinic Wiki uses a canister-backed file system as that memory layer. Organized knowledge lives under `/Wiki/...`; raw evidence lives under `/Sources/raw/...`. Agents can search it, follow paths and links, inspect recent changes, and update notes with `etag` guarded writes. +Kinic Wiki uses a canister-backed file system as that memory layer. Organized knowledge lives under `/Wiki/...`; raw evidence lives under `/Sources/raw/...`. Agents can search it, follow paths and links, and update notes with `etag` guarded writes. For many medium-sized agent workflows, structured file-system search is often more useful than embedding-only retrieval. A result is not just a similar chunk; it is a named, linked, updateable knowledge node. ## What It Provides - Browser access for public and private wiki databases -- Path-based reads, writes, search, recent nodes, and link graph inspection +- Path-based reads, writes, search, and link graph inspection - `etag` guarded edits for safer agent and operator workflows - Skill Registry packages for discovering, evaluating, and improving agent skills - Read-only Agent Memory API methods for direct canister clients diff --git a/docs/AGENT_MEMORY_API.md b/docs/AGENT_MEMORY_API.md index e802622a..ca4baf3d 100644 --- a/docs/AGENT_MEMORY_API.md +++ b/docs/AGENT_MEMORY_API.md @@ -85,5 +85,4 @@ The response returns the wiki `node_path` and refs with source path, linking pat - The Agent Memory API v1 is read-only. - Writes must use CLI commands, VFS mutation APIs, or the shared tool dispatcher. -- `recent_changes` is not part of v1. Use `recent_nodes` through the VFS API when recent live nodes are needed. - `memory_summary` is not part of v1. Use `query_context` with a summary-style task when a maintained overview is needed. diff --git a/docs/AGENT_TOOL_CALLING.md b/docs/AGENT_TOOL_CALLING.md index cef9546d..a3bb88b6 100644 --- a/docs/AGENT_TOOL_CALLING.md +++ b/docs/AGENT_TOOL_CALLING.md @@ -51,7 +51,6 @@ Current tool names: - `mkdir` - `mv` - `glob` -- `recent` - `graph_neighborhood` - `graph_links` - `incoming_links` @@ -75,7 +74,6 @@ Read-only tools are: - `skill_find` - `skill_inspect` - `skill_read` -- `recent` - `graph_neighborhood` - `graph_links` - `incoming_links` diff --git a/docs/RELEASE.md b/docs/RELEASE.md index c2118700..00ed8126 100644 --- a/docs/RELEASE.md +++ b/docs/RELEASE.md @@ -2,7 +2,7 @@ `kinic-vfs-cli` is published as the single operator binary for database setup, scripted writes, archive/restore, and Skill Registry maintenance. The Browser remains the primary public UI. -Primary distribution is npm. The npm package downloads GitHub Release assets and verifies SHA-256 checksums. Homebrew is optional follow-up packaging. Cargo install is a Rust-user fallback; crates.io publication is deferred. +Primary distribution is npm. The npm package downloads GitHub Release assets and verifies SHA-256 checksums. Cargo install is a Rust-user fallback; crates.io publication is deferred. ## npm @@ -74,37 +74,6 @@ tar -xzf kinic-vfs-cli-v0.1.3-macos-arm64.tar.gz ./kinic-vfs-cli --version ``` -## Homebrew - -Homebrew packaging is optional. The standard tap is `ICME-Lab/homebrew-tap`. If the tap does not exist yet, create it first. - -After a GitHub Release is available: - -1. Read the release checksum: - - ```bash - shasum -a 256 kinic-vfs-cli-v0.1.3-macos-arm64.tar.gz - ``` - -2. Copy [`../packaging/homebrew/Formula/kinic-vfs-cli.rb`](../packaging/homebrew/Formula/kinic-vfs-cli.rb) into `ICME-Lab/homebrew-tap`. - -3. Replace the placeholder `sha256` with the release checksum. - -4. Validate inside the tap repo: - - ```bash - brew audit --strict --online kinic-vfs-cli - brew install ICME-Lab/tap/kinic-vfs-cli - brew test kinic-vfs-cli - ``` - -Before release assets exist, only local syntax and style checks are expected to pass: - -```bash -ruby -c packaging/homebrew/Formula/kinic-vfs-cli.rb -brew style packaging/homebrew/Formula/kinic-vfs-cli.rb -``` - ## CI Artifacts The normal `cli-artifacts` CI job uses the same tarball layout as the release workflow, but uploads workflow artifacts instead of creating a GitHub Release. diff --git a/docs/internal/LLM_WIKI_STRUCTURE.md b/docs/internal/LLM_WIKI_STRUCTURE.md index d9bef4a9..e442fc2c 100644 --- a/docs/internal/LLM_WIKI_STRUCTURE.md +++ b/docs/internal/LLM_WIKI_STRUCTURE.md @@ -75,7 +75,7 @@ flowchart LR ### 5.3 `vfs_store` - SQLite 正本層 -- CRUD、move、append、multi edit、glob、recent、FTS search、link graph、node context、Agent Memory query、snapshot export、delta sync を集約 +- CRUD、move、append、multi edit、glob、FTS search、link graph、node context、Agent Memory query、snapshot export、delta sync を集約 - `schema.rs` で versioned migration を適用 - `fs_change_log` と `fs_path_state` により sync 差分を計算 - FTS preview 生成は ranking と分離して性能劣化を抑制 @@ -109,7 +109,7 @@ flowchart LR - `canister_health` - `read_node`, `list_nodes`, `list_children` - `write_node`, `append_node`, `edit_node`, `delete_node`, `move_node` -- `mkdir_node`, `glob_nodes`, `recent_nodes`, `multi_edit_node` +- `mkdir_node`, `glob_nodes`, `multi_edit_node` - `search_nodes`, `search_node_paths` - `read_node_context`, `incoming_links`, `outgoing_links`, `graph_links`, `graph_neighborhood` - `memory_manifest`, `query_context`, `source_evidence` @@ -190,12 +190,12 @@ sequenceDiagram | --- | --- | --- | | build | `build-vfs-canister.sh`, `build-vfs-canister-canbench.sh` | canister build | | canbench | `run_canbench_guard.sh`, `run_canbench_scale.sh`, `canbench/*.py` | canbench 集計・比較 | -| bench | `bench/run_beam_bench.sh`, `bench/run_canister_vfs_*.sh` | beam / canister workload | +| bench | `bench/run_canister_vfs_*.sh` | canister workload | | env | `setup_canbench_ci.sh` | 実行環境補助 | ### 8.3 `fixtures/` -- `fixtures/beam/`: beam sample 入力 +- 現在、永続 fixture はない ### 8.4 `artifacts/` diff --git a/docs/internal/WIKI_BROWSER_PLAN.md b/docs/internal/WIKI_BROWSER_PLAN.md index 3c8b0d3d..303a4e69 100644 --- a/docs/internal/WIKI_BROWSER_PLAN.md +++ b/docs/internal/WIKI_BROWSER_PLAN.md @@ -41,11 +41,10 @@ - `GET /api/site/[canisterId]/children?path=/Wiki/...` - `GET /api/site/[canisterId]/search-path?q=...` - `GET /api/site/[canisterId]/search?q=...` - - `GET /api/site/[canisterId]/recent?limit=...` - JSON の bigint 系値は string で返す - UI は read-only 3 ペインにする。 - - 左: Explorer / Search / Recent tabs + - 左: Explorer / Search tabs - 中央: Markdown Preview / Raw 切替 - 右: Inspector - mobile は drawer 化する desktop-first responsive @@ -77,7 +76,7 @@ URL state: - selected node: route path - `view=preview|raw` -- `tab=explorer|search|recent` +- `tab=explorer|search` - `q=...` - expanded tree state は URL に載せない @@ -102,7 +101,7 @@ URL state: - UI: - `/site/[canisterId]` が `/Wiki` を開く - `/site/[canisterId]/Wiki/...` が該当 node を開く - - tree 展開、search、recent、preview/raw 切替が動く + - tree 展開、search、preview/raw 切替が動く - inspector が metadata / links / hints を表示する - Playwright で desktop と mobile 表示を確認する diff --git a/docs/internal/llm-wiki-structure.svg b/docs/internal/llm-wiki-structure.svg deleted file mode 100644 index 9e50d39b..00000000 --- a/docs/internal/llm-wiki-structure.svg +++ /dev/null @@ -1,111 +0,0 @@ - - llm-wiki structure overview - Overview of llm-wiki workspace crates, data flow, support assets, and non-workspace directories. - - - - - - - - - - llm-wiki Structure - IC canister-backed VFS wiki base. Source of truth is remote node storage under /Wiki and /Sources. - - - Interface Layer - - vfs_cli_app - human / agent CLI - status, node ops, maintenance, skill registry - - vfs_cli_core - generic CLI core - - vfs_client - ic-agent transport + candid RPC - - - Canister / Service - - vfs_canister - IC query/update entrypoints - ic-sqlite-vfs stable memory, migrations - - vfs_runtime - VfsService boundary - thin orchestration around FsStore - - wiki_domain - wiki/source path validation - - - Storage Core - - vfs_store - SQLite source of truth - CRUD, append, edit, move, delete, glob, recent - FTS search, snapshot export, delta sync, etag - - schema.rs - versioned migration, legacy reject - - vfs_types - shared contracts for all layers - - - Support Surface - - docs/validation - correctness and benchmark contract - - scripts/ - build, canbench, beam, env helpers - - agent skills - repo-local workflow skills - kinic-wiki-ingest / lint / query - - - - - - - - Key Flows - Read / Write Path - agent or CLI → vfs_client → vfs_canister → VfsService → FsStore → SQLite/FTS - Snapshot APIs - snapshot revision, change log, removed_paths, changed_nodes, and etag conflict control - Knowledge Policy - wiki note schema authority lives in docs/internal/WIKI_CANONICALITY.md, not in generic VFS crates - - - Top-Level Repository Areas - README.md: public entry and quick start - crates/: runtime implementation core - docs/: internal policy and validation docs - fixtures/: beam samples - artifacts/: benchmark output and wiki backups - - - Not in Current Workspace Build - crates/wiki_agent_schema - crates/wiki_http_adapter - crates/wiki_runtime - crates/wiki_search - crates/wiki_types - empty dirs only; safe to treat as inactive for current architecture reading - diff --git a/docs/validation/VFS_CORRECTNESS_CHECKLIST.md b/docs/validation/VFS_CORRECTNESS_CHECKLIST.md index daf91a12..064f0e1d 100644 --- a/docs/validation/VFS_CORRECTNESS_CHECKLIST.md +++ b/docs/validation/VFS_CORRECTNESS_CHECKLIST.md @@ -14,7 +14,6 @@ This checklist records what the current FS-first contract already covers, what w | `move_node` / overwrite | `fs_store_vfs`, `fs_store_sync`, `vfs_canister` | covered | | `list_nodes` shallow / recursive / virtual directory | `fs_store_basic`, `fs_store_scale`, `vfs_canister` | covered | | deep `glob_nodes("**/*.md")` | `fs_store_vfs`, `fs_store_scale` | covered | -| `recent_nodes` | `fs_store_vfs`, `vfs_canister` | covered | | `search_nodes` prefix filtering / deleted node suppression | `fs_store_basic`, `fs_store_scale`, `tests_sync_contract` | covered | | `export_snapshot` stability | `fs_store_basic`, `fs_store_sync`, `vfs_canister` | covered | | `fetch_updates` empty delta | `fs_store_sync`, `vfs_canister` | covered | diff --git a/docs/validation/VFS_DEPLOYED_CANISTER_BENCHMARKS.md b/docs/validation/VFS_DEPLOYED_CANISTER_BENCHMARKS.md index 4d78af40..248e9d3b 100644 --- a/docs/validation/VFS_DEPLOYED_CANISTER_BENCHMARKS.md +++ b/docs/validation/VFS_DEPLOYED_CANISTER_BENCHMARKS.md @@ -13,10 +13,10 @@ Primary metrics: | Bench | Role | Main targets | | --- | --- | --- | -| `canister_vfs_workload` | repeated API workload benchmark | `create`, `update`, `append`, `edit`, `move_same_dir`, `move_cross_dir`, `delete`, `read`, `list`, `search`, `mkdir`, `glob`, `recent`, `multi_edit` | +| `canister_vfs_workload` | repeated API workload benchmark | `create`, `update`, `append`, `edit`, `move_same_dir`, `move_cross_dir`, `delete`, `read`, `list`, `search`, `mkdir`, `glob`, `multi_edit` | | `canister_vfs_latency` | single-update latency benchmark | `write_node`, `append_node` | -`query` is treated as a documentation category covering `read`, `list`, `search`, `mkdir`, `glob`, and `recent`, not as a separate method name. +`query` is treated as a documentation category covering `read`, `list`, `search`, `mkdir`, and `glob`, not as a separate method name. ## Fixed Conditions @@ -28,7 +28,7 @@ Primary metrics: | Transport | `ic-agent` | | Cycles source | `icp canister status --json` | | Update cycles scope | `isolated_single_op` | -| Query cycles scope | `isolated_single_op` for `read`, `list`, `search`, `glob`, `recent` | +| Query cycles scope | `isolated_single_op` for `read`, `list`, `search`, `glob` | | Validation-only query scope | `scenario_total` for `mkdir` | `isolated_single_op` separates setup from the measured call and uses `measured_cycles_delta` plus `cycles_per_measured_request` as the main table. @@ -102,7 +102,6 @@ If cycles cannot be collected, the benchmark should continue. Cycles fields beco | `search` | `search` | `search_nodes` | run a hit-producing search against a seeded corpus | | `mkdir` | `mkdir` | `mkdir_node` | validate a unique path per iteration | | `glob` | `glob` | `glob_nodes` | run `pattern=node-*.md` within the bench prefix | -| `recent` | `recent` | `recent_nodes` | read recent nodes with `limit = min(10, file_count)` | | `multi_edit` | `multi_edit` | `multi_edit_node` | apply two atomic token replacements | ## Update ACK Contract diff --git a/fixtures/beam/beam_evidence_sample.json b/fixtures/beam/beam_evidence_sample.json deleted file mode 100644 index 857c937c..00000000 --- a/fixtures/beam/beam_evidence_sample.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "100K": [ - { - "conversation_id": "beam-evidence-1", - "conversation_seed": { - "category": "General", - "title": "Calendar planning" - }, - "narratives": "A short planning conversation about a meeting date.", - "user_profile": { - "user_info": "Sample user profile" - }, - "conversation_plan": "Discuss one meeting date and confirm it.", - "user_questions": [ - { - "messages": [ - [ - "Can you help me remember the meeting date?" - ] - ] - } - ], - "chat": [ - [ - { - "role": "user", - "content": "Please remember that the meeting is on March 15, 2024." - }, - { - "role": "assistant", - "content": "Understood. I will remember March 15, 2024." - } - ] - ], - "probing_questions": "{'factoid':[{'question':'When is the meeting?','answer':'March 15, 2024','gold_answers':['March 15, 2024'],'gold_paths':['facts.md'],'gold_spans':['March 15, 2024'],'tags':['factoid','facts']},{'question':'What was the conversation plan?','answer':'Discuss one meeting date and confirm it.','gold_answers':['Discuss one meeting date and confirm it.'],'gold_paths':['plan.md'],'gold_spans':['Discuss one meeting date and confirm it.'],'tags':['factoid','plan']}],'temporal_reasoning':[{'question':'What happened in the first turn?','answer':'Please remember that the meeting is on March 15, 2024.','gold_answers':['Please remember that the meeting is on March 15, 2024.'],'gold_paths':['events.md'],'gold_spans':['Turn 0001 user: Please remember that the meeting is on March 15, 2024.'],'as_of':'2026-04-16T00:00:00+09:00','tags':['temporal','events']}],'abstention':[{'question':'What city was the meeting in?','answer':'insufficient evidence','expects_abstention':true,'gold_answers':['insufficient evidence'],'tags':['abstention']}]}" - } - ] -} diff --git a/fixtures/beam/beam_sample.json b/fixtures/beam/beam_sample.json deleted file mode 100644 index 1f19ce51..00000000 --- a/fixtures/beam/beam_sample.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "100K": [ - { - "conversation_id": "beam-sample-1", - "conversation_seed": { - "category": "General", - "title": "Calendar planning" - }, - "narratives": "A short planning conversation about a meeting date.", - "user_profile": { - "user_info": "Sample user profile" - }, - "conversation_plan": "Discuss one meeting date and confirm it.", - "user_questions": [ - { - "messages": [ - [ - "Can you help me remember the meeting date?" - ] - ] - } - ], - "chat": [ - [ - { - "role": "user", - "content": "Please remember that the meeting is on March 15, 2024." - }, - { - "role": "assistant", - "content": "Understood. I will remember March 15, 2024." - } - ] - ], - "probing_questions": "{'abstention':[{'question':'What city was the meeting in?','answer':'insufficient evidence','expects_abstention':true,'gold_answers':['insufficient evidence'],'tags':['abstention']}],'factoid':[{'question':'When is the meeting?','answer':'March 15, 2024','gold_answers':['March 15, 2024'],'gold_paths':['facts.md'],'gold_spans':['March 15, 2024'],'tags':['factoid','facts']},{'question':'What was the conversation plan?','answer':'Discuss one meeting date and confirm it.','gold_answers':['Discuss one meeting date and confirm it.'],'gold_paths':['plan.md'],'gold_spans':['Discuss one meeting date and confirm it.'],'tags':['factoid','plan']}],'temporal_reasoning':[{'question':'What happened in the first turn?','answer':'Please remember that the meeting is on March 15, 2024.','gold_answers':['Please remember that the meeting is on March 15, 2024.'],'gold_paths':['events.md'],'gold_spans':['Turn 0001 user: Please remember that the meeting is on March 15, 2024.'],'as_of':'2026-04-16T00:00:00+09:00','tags':['temporal','events']}]}" - } - ] -} diff --git a/packaging/homebrew/Formula/kinic-vfs-cli.rb b/packaging/homebrew/Formula/kinic-vfs-cli.rb deleted file mode 100644 index 5ebf8759..00000000 --- a/packaging/homebrew/Formula/kinic-vfs-cli.rb +++ /dev/null @@ -1,20 +0,0 @@ -class KinicVfsCli < Formula - desc "Operator CLI for Kinic VFS-backed wiki databases and Skill Registry packages" - homepage "https://github.com/ICME-Lab/kinic-wiki" - version "0.1.1" - - if OS.mac? && Hardware::CPU.arm? - url "https://github.com/ICME-Lab/kinic-wiki/releases/download/v#{version}/kinic-vfs-cli-v#{version}-macos-arm64.tar.gz" - sha256 "232a81c1a3ecd0b7d1c3e189e276fe5bf56fb546b6d14900bdbfee4cce9e5b24" - else - odie "kinic-vfs-cli v#{version} formula currently supports macOS arm64 only" - end - - def install - bin.install "kinic-vfs-cli" - end - - test do - assert_match "Usage:", shell_output("#{bin}/kinic-vfs-cli --help") - end -end diff --git a/plugins/hermes/README.md b/plugins/hermes/README.md index 9206ec82..f6da0751 100644 --- a/plugins/hermes/README.md +++ b/plugins/hermes/README.md @@ -71,7 +71,7 @@ Metrics route: 確認: ```bash -target/debug/kinic-vfs-cli recent-nodes --path /Sources/skill-runs --limit 20 --json +target/debug/kinic-vfs-cli list-nodes --prefix /Sources/skill-runs --recursive --json ls ~/.kinic/pending-runs tail ~/.kinic/hermes-plugin.log ``` diff --git a/plugins/runtime/kinic_agent_runtime/evolve.py b/plugins/runtime/kinic_agent_runtime/evolve.py index 744e2884..f9b609c3 100644 --- a/plugins/runtime/kinic_agent_runtime/evolve.py +++ b/plugins/runtime/kinic_agent_runtime/evolve.py @@ -290,7 +290,7 @@ def read_run_paths(cli: str, paths: list[str]) -> list[dict[str, str]]: def read_corrections(cli: str, skill_id: str) -> list[dict[str, str]]: - output = run_cli(cli, "recent-nodes", "--path", f"/Sources/skill-runs/{skill_id}", "--limit", "100", "--json") + output = run_cli(cli, "list-nodes", "--prefix", f"/Sources/skill-runs/{skill_id}", "--recursive", "--json") entries = json.loads(output) paths = [ entry.get("path", "") diff --git a/scripts/bench/run_beam_bench.sh b/scripts/bench/run_beam_bench.sh deleted file mode 100755 index f79fa369..00000000 --- a/scripts/bench/run_beam_bench.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Where: scripts/bench/run_beam_bench.sh -# What: Run the read-only BEAM-derived retrieval benchmark binary. -# Why: Eval must stay separate from namespace preparation and canister writes. - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" - -cd "${REPO_ROOT}" -cargo run -p kinic-vfs-cli --bin beam_bench -- "$@" diff --git a/scripts/bench/run_beam_grounded_slice.sh b/scripts/bench/run_beam_grounded_slice.sh deleted file mode 100755 index 6041086f..00000000 --- a/scripts/bench/run_beam_grounded_slice.sh +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Where: scripts/bench/run_beam_grounded_slice.sh -# What: Prepare then run one grounded QA slice against the read-only BEAM eval harness. -# Why: Test runs should use one command so prepare/eval order is never skipped. - -if [[ $# -lt 5 ]]; then - echo "usage: $0 [extra args...]" >&2 - exit 1 -fi - -SLICE="$1" -CANISTER_ID="$2" -DATASET_PATH="$3" -OUTPUT_DIR="$4" -NAMESPACE="$5" -shift 5 - -SPLIT="100K" -LIMIT_ARGS=(--limit 20) -PASSTHROUGH_ARGS=() -while [[ $# -gt 0 ]]; do - case "$1" in - --split) - SPLIT="$2" - shift 2 - ;; - --limit) - LIMIT_ARGS=("$1" "$2") - shift 2 - ;; - *) - PASSTHROUGH_ARGS+=("$1") - shift - ;; - esac -done - -ARGS=( - --local - --canister-id "$CANISTER_ID" - --dataset-path "$DATASET_PATH" - --split "$SPLIT" - --output-dir "$OUTPUT_DIR" - --eval-mode retrieve-and-extract - --top-k 3 - --parallelism 1 - --namespace "$NAMESPACE" -) -ARGS+=("${LIMIT_ARGS[@]}") - -case "$SLICE" in - information-extraction) - ARGS+=(--include-question-type information_extraction) - ;; - temporal-reasoning) - ARGS+=(--include-question-type temporal_reasoning) - ;; - event-ordering) - ARGS+=(--include-question-type event_ordering) - ;; - instruction-following) - ARGS+=(--include-question-type instruction_following) - ;; - preference-following) - ARGS+=(--include-question-type preference_following) - ;; - knowledge-update) - ARGS+=(--include-question-type knowledge_update) - ;; - contradiction-resolution) - ARGS+=(--include-question-type contradiction_resolution) - ;; - summarization) - ARGS+=(--include-question-type summarization) - ;; - multi-session-reasoning) - ARGS+=(--include-question-type multi_session_reasoning) - ;; - abstention) - ARGS+=(--include-question-type abstention) - ;; - facts) - ARGS+=(--include-question-type information_extraction) - ;; - temporal) - ARGS+=(--include-question-type temporal_reasoning) - ;; - plan) - ARGS+=(--include-question-class factoid --include-tag plan) - ;; - *) - echo "unknown slice: $SLICE" >&2 - exit 1 - ;; -esac - -BENCH_DIR="$(cd "$(dirname "$0")" && pwd)" -PREPARE_ARGS=( - --local - --canister-id "$CANISTER_ID" - --dataset-path "$DATASET_PATH" - --split "$SPLIT" - --namespace "$NAMESPACE" -) -if [[ ${#LIMIT_ARGS[@]} -gt 0 ]]; then - PREPARE_ARGS+=("${LIMIT_ARGS[@]}") -fi -BENCH_ARGS=("${ARGS[@]}") -if [[ ${#PASSTHROUGH_ARGS[@]} -gt 0 ]]; then - BENCH_ARGS+=("${PASSTHROUGH_ARGS[@]}") -fi - -bash "${BENCH_DIR}/run_beam_prepare.sh" "${PREPARE_ARGS[@]}" -bash "${BENCH_DIR}/run_beam_bench.sh" "${BENCH_ARGS[@]}" diff --git a/scripts/bench/run_beam_improvement_slices.sh b/scripts/bench/run_beam_improvement_slices.sh deleted file mode 100755 index c8329cfa..00000000 --- a/scripts/bench/run_beam_improvement_slices.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Where: scripts/bench/run_beam_improvement_slices.sh -# What: Run the BEAM improvement slices in the recommended order with fixed names. -# Why: Prompt-tuning checks should rerun the same question-type slices without ad hoc command drift. - -if [[ $# -lt 3 ]]; then - echo "usage: $0 [namespace] [extra args...]" >&2 - exit 1 -fi - -CANISTER_ID="$1" -DATASET_PATH="$2" -OUTPUT_ROOT="$3" -NAMESPACE="${4:-beam-full-reset}" - -if [[ $# -ge 4 ]]; then - shift 4 -else - shift 3 -fi - -EXTRA_ARGS=("$@") -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" - -SLICES=( - preference-following - information-extraction - summarization - multi-session-reasoning - contradiction-resolution - temporal-reasoning -) - -for slice in "${SLICES[@]}"; do - ARGS=( - "$slice" - "$CANISTER_ID" - "$DATASET_PATH" - "${OUTPUT_ROOT}/${slice}" - "$NAMESPACE" - ) - if [[ ${#EXTRA_ARGS[@]} -gt 0 ]]; then - ARGS+=("${EXTRA_ARGS[@]}") - fi - bash "${SCRIPT_DIR}/run_beam_grounded_slice.sh" "${ARGS[@]}" -done diff --git a/scripts/bench/run_beam_prepare.sh b/scripts/bench/run_beam_prepare.sh deleted file mode 100755 index bf10d3ab..00000000 --- a/scripts/bench/run_beam_prepare.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Where: scripts/bench/run_beam_prepare.sh -# What: Prepare a BEAM benchmark namespace before read-only eval. -# Why: Note import and index sync must happen outside `beam_bench`. - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" - -cd "${REPO_ROOT}" -cargo run -p kinic-vfs-cli --bin beam_prepare -- "$@" diff --git a/scripts/bench/run_canister_vfs_workload.sh b/scripts/bench/run_canister_vfs_workload.sh index 81205d88..2a09f0f3 100644 --- a/scripts/bench/run_canister_vfs_workload.sh +++ b/scripts/bench/run_canister_vfs_workload.sh @@ -89,14 +89,14 @@ node -e ' const pickIterations = (operation, size) => { if (operation === "list") return Number(process.env.WORKLOAD_LIST_ITERATIONS || 100); if (operation === "search") return Number(process.env.WORKLOAD_SEARCH_ITERATIONS || 50); - if (operation === "mkdir" || operation === "glob" || operation === "recent") { + if (operation === "mkdir" || operation === "glob") { return Number(process.env.WORKLOAD_QUERY_ITERATIONS || process.env.WORKLOAD_LIST_ITERATIONS || 100); } return size.iterations; }; const defaultOperations = [ "create", "update", "append", "edit", "move_same_dir", "move_cross_dir", "delete", "read", "list", "search", - "mkdir", "glob", "recent", "multi_edit" + "mkdir", "glob", "multi_edit" ]; const operations = parseList(process.env.WORKLOAD_OPERATIONS, defaultOperations); const directoryShapes = parseList(process.env.WORKLOAD_DIRECTORY_SHAPES, ["flat"]); @@ -244,7 +244,6 @@ write_failed_raw() { search: { openai_tool: "search", openai_tool_variant: null }, mkdir: { openai_tool: "mkdir", openai_tool_variant: null }, glob: { openai_tool: "glob", openai_tool_variant: null }, - recent: { openai_tool: "recent", openai_tool_variant: null }, multi_edit: { openai_tool: "multi_edit", openai_tool_variant: null } }; const oa = openaiMap[op] ?? { openai_tool: op, openai_tool_variant: null };