diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 93eb48f..6f578cc 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -437,8 +437,8 @@ { "name": "skillopt", "source": "./commands/skillopt", - "description": "0.3.0 (2026-06-07) EXECUTABLE CLI — `/skillopt` now runs directly via a `!`-exec dispatcher (`bin/skillopt.sh`, prefs-style: resolves its own cached install dir, so the user never types a long path). `/skillopt train` runs the loop in one token; `/skillopt doctor|ckpts|consume |help` all dispatch. Honest 0-edit runs reported as such; claude -p clarified = subscription (NOT metered). 0.2.0 (2026-06-07) sidecar-OWNED env adapter — the only domain code (run a task + score it) now ships IN the plugin at `commands/skillopt/examples//`, NOT a clone of the upstream package; `bin/skillopt_run.py` injects sidecar adapters into the upstream hard-coded `_ENV_REGISTRY` at runtime (additive · survives `_register_builtins`) then runs the upstream trainer. Bundled reference `examples/toyqa/` = 4-item exact-match QA proving the loop runs end-to-end on local `claude -p` (no API key, no external data — target+optimizer both shell out to the Claude Code CLI = subscription, NOT metered); `examples/_base/default.yaml` vendors the upstream base config so examples are self-contained (plain `pip install skillopt` ships no `configs/`). Verified: baseline→rollout→reflect(on failures)→gate→test execute against real claude calls; an edit lands only when the optimizer judges a failure generalizable AND the held-out gate improves (no forced edit). 0.1.0 (2026-06-07) initial — /skillopt drives SkillOpt (microsoft/SkillOpt · `pip install skillopt`, arXiv:2605.23904), the text-space optimizer that trains a natural-language SKILL DOCUMENT for a frozen Claude Code agent via rollout → reflect → edit → held-out gate (DL analogy: skill.md = weights · rollout = forward · reflect = backprop · gate = validation early-stop). Subverbs — doctor (pip pkg + claude CLI + harness wiring readiness) · ckpts (list the package's bundled pretrained skill.md artifacts) · consume (load a trained skill into THIS session as additive system guidance) · train (run the loop with Claude Code as the target harness — env TARGET_BACKEND=claude_code_exec · CLAUDE_SETTING_SOURCES=user,project so the sidecar tapes ride along as FIXED scaffolding; refuses to run without a real scoring env adapter — no fabricated scores) · help. HARD INVARIANT: only the skill.md is optimized — the model, the governance tapes (= SkillOpt's fixed prompts/*_system.md), and every *-guard safety hook stay UNCHANGED; the trained skill is a SEPARATE per-domain file injected via --append-system-prompt; /skillopt never edits a .tape or a guard (a held-out UTILITY gate is not a SAFETY check — kept orthogonal). Wraps the upstream pip CLI; does not vendor or fork it. Companion: microsoft/SkillLens (arXiv:2605.23899).", - "version": "0.3.0" + "description": "0.4.0 (2026-06-07) BACKGROUND TRAIN + HARDER EXAMPLE — `/skillopt train --bg` detaches the run (nohup → log under ~/.sidecar/skillopt/), returns immediately; `/skillopt status` shows running-state + score/step progress, `/skillopt log` tails. The bundled `examples/toyqa` dataset swapped to 6 format-sensitive QA items (chemical symbols, ISO codes, rounding…) that an EMPTY skill answers in a full sentence → STRICT exact-match fails → a real learning gradient (the optimizer learns a 'reply with only the value' rule, then the held-out gate rises); train_size 6 · batch 3 · sel 5 for stronger signal. 0.3.0 (2026-06-07) EXECUTABLE CLI — `/skillopt` now runs directly via a `!`-exec dispatcher (`bin/skillopt.sh`, prefs-style: resolves its own cached install dir, so the user never types a long path). `/skillopt train` runs the loop in one token; `/skillopt doctor|ckpts|consume |help` all dispatch. Honest 0-edit runs reported as such; claude -p clarified = subscription (NOT metered). 0.2.0 (2026-06-07) sidecar-OWNED env adapter — the only domain code (run a task + score it) now ships IN the plugin at `commands/skillopt/examples//`, NOT a clone of the upstream package; `bin/skillopt_run.py` injects sidecar adapters into the upstream hard-coded `_ENV_REGISTRY` at runtime (additive · survives `_register_builtins`) then runs the upstream trainer. Bundled reference `examples/toyqa/` = 4-item exact-match QA proving the loop runs end-to-end on local `claude -p` (no API key, no external data — target+optimizer both shell out to the Claude Code CLI = subscription, NOT metered); `examples/_base/default.yaml` vendors the upstream base config so examples are self-contained (plain `pip install skillopt` ships no `configs/`). Verified: baseline→rollout→reflect(on failures)→gate→test execute against real claude calls; an edit lands only when the optimizer judges a failure generalizable AND the held-out gate improves (no forced edit). 0.1.0 (2026-06-07) initial — /skillopt drives SkillOpt (microsoft/SkillOpt · `pip install skillopt`, arXiv:2605.23904), the text-space optimizer that trains a natural-language SKILL DOCUMENT for a frozen Claude Code agent via rollout → reflect → edit → held-out gate (DL analogy: skill.md = weights · rollout = forward · reflect = backprop · gate = validation early-stop). Subverbs — doctor (pip pkg + claude CLI + harness wiring readiness) · ckpts (list the package's bundled pretrained skill.md artifacts) · consume (load a trained skill into THIS session as additive system guidance) · train (run the loop with Claude Code as the target harness — env TARGET_BACKEND=claude_code_exec · CLAUDE_SETTING_SOURCES=user,project so the sidecar tapes ride along as FIXED scaffolding; refuses to run without a real scoring env adapter — no fabricated scores) · help. HARD INVARIANT: only the skill.md is optimized — the model, the governance tapes (= SkillOpt's fixed prompts/*_system.md), and every *-guard safety hook stay UNCHANGED; the trained skill is a SEPARATE per-domain file injected via --append-system-prompt; /skillopt never edits a .tape or a guard (a held-out UTILITY gate is not a SAFETY check — kept orthogonal). Wraps the upstream pip CLI; does not vendor or fork it. Companion: microsoft/SkillLens (arXiv:2605.23899).", + "version": "0.4.0" }, { "name": "sidecar", diff --git a/CHANGELOG.md b/CHANGELOG.md index 8635767..eb625cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,18 @@ For the full audit trail, see `git log`. --- +## 2026-06-07 — 🎓 skillopt 0.4.0 — 백그라운드 학습 + 더 어려운 예제 (SKILLOPT.easy §8 구현) + +SKILLOPT.easy.md 의 "더 다듬을 거리" 2개를 구현. + +- 🌙 **백그라운드 학습** — `/skillopt train --bg` 가 학습을 detached(nohup)로 던지고 + 즉시 반환(로그 `~/.sidecar/skillopt/train-.log`). `/skillopt status` 가 실행 + 여부 + 점수/스텝 진행을 보여주고, `/skillopt log` 가 tail. (세탁기 돌려놓고 딴 일) +- 🎯 **더 어려운 예제 데이터** — `examples/toyqa` 를 format-sensitive 6문항(화학기호· + ISO 코드·반올림·과거형…)으로 교체. 빈 스킬은 문장으로 답해 STRICT exact-match 가 + 틀림 → 실제 학습 gradient 발생(옵티마이저가 "값만 답하기" 규칙을 배우면 held-out + gate 상승). train_size 6 · batch 3 · sel 5 로 신호 강화. + ## 2026-06-07 — 🎓 skillopt 0.3.0 — 실행형 CLI (`/skillopt train` 한 줄) 긴 python 경로 손입력 제거. `/skillopt`가 이제 `!`-exec 디스패처 diff --git a/commands/skillopt/.claude-plugin/plugin.json b/commands/skillopt/.claude-plugin/plugin.json index 4bbd78b..fcd9954 100644 --- a/commands/skillopt/.claude-plugin/plugin.json +++ b/commands/skillopt/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "skillopt", "description": "/skillopt — drive SkillOpt (microsoft/SkillOpt · `pip install skillopt`), the text-space optimizer that trains a natural-language SKILL DOCUMENT for a frozen Claude Code agent via rollout → reflect → edit → held-out gate (the DL analogy: the skill.md is the 'weights', rollout=forward, reflect=backprop, gate=validation early-stop). Subverbs — doctor (install + readiness check: pip pkg + claude CLI) · ckpts (list the bundled pretrained skill.md artifacts) · consume (load a trained skill into THIS session as system guidance) · train (run the optimization loop with Claude Code as the target harness — env TARGET_BACKEND=claude_code_exec, CLAUDE_SETTING_SOURCES=user,project; needs a scoring env adapter) · help. ONLY the skill.md changes — the model, the sidecar governance tapes (= SkillOpt's fixed prompts/*_system.md scaffolding), and the *-guard safety hooks all stay fixed. Never auto-edits governance; it produces a SEPARATE per-domain skill.md the harness injects via --append-system-prompt. Wraps the upstream pip CLI; does not vendor it.", - "version": "0.3.0", + "version": "0.4.0", "author": { "name": "dancinlab" }, diff --git a/commands/skillopt/bin/skillopt.sh b/commands/skillopt/bin/skillopt.sh index c98d4b6..9698159 100755 --- a/commands/skillopt/bin/skillopt.sh +++ b/commands/skillopt/bin/skillopt.sh @@ -37,13 +37,49 @@ consume() { echo "===== END SKILL ($(grep -cE '^[-*] ' "$f" 2>/dev/null || echo 0) rules) =====" } +LOGDIR="${SKILLOPT_LOG:-$HOME/.sidecar/skillopt}" + train() { _have_pkg || { echo "skillopt not installed → pip install skillopt (then /skillopt train)"; return 1; } command -v claude >/dev/null 2>&1 || echo "⚠ claude CLI missing — the run will fail at the first rollout." + # Background mode: detach, log to a file, return immediately (watch with /skillopt status). + if [ "${1:-}" = "--bg" ] || [ "${1:-}" = "-b" ]; then + shift + mkdir -p "$LOGDIR" + local log="$LOGDIR/train-$(date +%Y%m%d-%H%M%S).log" + nohup "$PY" "$HERE/skillopt_run.py" "$@" >"$log" 2>&1 & + local pid=$! + echo "$pid" > "$LOGDIR/train.pid" + echo "🌙 background train started — pid $pid" + echo " log : $log" + echo " watch : /skillopt status · tail: /skillopt log" + return 0 + fi echo "▶ launching SkillOpt train via the in-plugin launcher (registers sidecar envs)…" exec "$PY" "$HERE/skillopt_run.py" "$@" } +_latest_log() { ls -1t "$LOGDIR"/train-*.log 2>/dev/null | head -1; } + +status() { + local pid="" alive="no" + [ -f "$LOGDIR/train.pid" ] && pid="$(cat "$LOGDIR/train.pid" 2>/dev/null)" + [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null && alive="yes" + local log; log="$(_latest_log)" + echo "🌙 /skillopt status" + echo " running : $alive${pid:+ (pid $pid)}" + echo " log : ${log:-(none — run: /skillopt train --bg)}" + [ -n "$log" ] || return 0 + echo " ── progress (score / step lines) ──" + grep -aE 'selection hard|gate\[|STEP |EPOCH |best skill|new best|Test Results|done\]' "$log" 2>/dev/null | tail -8 | sed 's/^/ /' + echo " ── tail ──"; tail -4 "$log" | sed 's/^/ /' +} + +log_cmd() { + local log; log="$(_latest_log)" + [ -n "$log" ] && tail -40 "$log" || echo "no train log yet — run: /skillopt train --bg" +} + usage() { cat <<'EOF' /skillopt — train a skill document for a frozen Claude Code agent (SkillOpt driver) @@ -53,6 +89,9 @@ usage() { /skillopt ckpts list bundled pretrained skill.md /skillopt consume print a trained skill (agent adopts it this session) /skillopt train [--config X] run rollout→reflect→edit→gate (default: examples/toyqa) + /skillopt train --bg run in the background (returns immediately) + /skillopt status background run state + score/step progress + /skillopt log tail the latest train log /skillopt help this text claude -p = your Claude Code subscription (NO metered API cost). pip install skillopt first. @@ -65,6 +104,8 @@ case "$sub" in ckpts) ckpts ;; consume) consume "$@" ;; train) train "$@" ;; + status) status ;; + log) log_cmd ;; help|-h|--help) usage ;; *.md) consume "$sub" ;; # bare path → consume *) echo "unknown subverb: $sub"; echo; usage; exit 2 ;; diff --git a/commands/skillopt/examples/toyqa/__pycache__/__init__.cpython-314.pyc b/commands/skillopt/examples/toyqa/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..687f75e Binary files /dev/null and b/commands/skillopt/examples/toyqa/__pycache__/__init__.cpython-314.pyc differ diff --git a/commands/skillopt/examples/toyqa/__pycache__/dataloader.cpython-314.pyc b/commands/skillopt/examples/toyqa/__pycache__/dataloader.cpython-314.pyc new file mode 100644 index 0000000..6c12eac Binary files /dev/null and b/commands/skillopt/examples/toyqa/__pycache__/dataloader.cpython-314.pyc differ diff --git a/commands/skillopt/examples/toyqa/config.yaml b/commands/skillopt/examples/toyqa/config.yaml index c069956..d7cfbc6 100644 --- a/commands/skillopt/examples/toyqa/config.yaml +++ b/commands/skillopt/examples/toyqa/config.yaml @@ -13,8 +13,8 @@ model: train: num_epochs: 1 - train_size: 4 - batch_size: 2 + train_size: 6 + batch_size: 3 accumulation: 1 seed: 42 @@ -34,7 +34,7 @@ optimizer: use_meta_skill: false evaluation: - sel_env_num: 3 + sel_env_num: 5 test_env_num: 0 env: diff --git a/commands/skillopt/examples/toyqa/dataloader.py b/commands/skillopt/examples/toyqa/dataloader.py index e863c84..5091898 100644 --- a/commands/skillopt/examples/toyqa/dataloader.py +++ b/commands/skillopt/examples/toyqa/dataloader.py @@ -8,16 +8,24 @@ from skillopt.datasets.base import BaseDataLoader, BatchSpec +# Format-sensitive questions: an EMPTY skill tends to answer in a full sentence +# ("The chemical symbol for gold is Au.") → STRICT exact-match on the last line FAILS. +# That failure is the gradient — the optimizer should learn a "reply with ONLY the +# value, no sentence" rule, after which the held-out eval score rises (an accepted edit). _TRAIN = [ - {"id": "t1", "question": "What is the capital of France?", "answer": "Paris"}, - {"id": "t2", "question": "2+2 = ?", "answer": "4"}, - {"id": "t3", "question": "What color is a clear daytime sky?", "answer": "blue"}, - {"id": "t4", "question": "Who wrote Romeo and Juliet? Surname.", "answer": "Shakespeare"}, + {"id": "t1", "question": "What is the chemical symbol for gold?", "answer": "Au"}, + {"id": "t2", "question": "How many sides does a hexagon have?", "answer": "6"}, + {"id": "t3", "question": "What is the past tense of the verb 'run'?", "answer": "ran"}, + {"id": "t4", "question": "Round 3.14159 to two decimal places.", "answer": "3.14"}, + {"id": "t5", "question": "What is 7 times 8?", "answer": "56"}, + {"id": "t6", "question": "Give the ISO two-letter country code for Germany.", "answer": "DE"}, ] _EVAL = [ - {"id": "e1", "question": "What is the capital of Japan?", "answer": "Tokyo"}, - {"id": "e2", "question": "3+5 = ?", "answer": "8"}, - {"id": "e3", "question": "Largest planet in our solar system?", "answer": "Jupiter"}, + {"id": "e1", "question": "What is the chemical symbol for sodium?", "answer": "Na"}, + {"id": "e2", "question": "How many sides does a pentagon have?", "answer": "5"}, + {"id": "e3", "question": "What is the past tense of the verb 'go'?", "answer": "went"}, + {"id": "e4", "question": "Give the ISO two-letter country code for France.", "answer": "FR"}, + {"id": "e5", "question": "What is 9 times 6?", "answer": "54"}, ]