From d4017642f4e6ce5dec162ae61e9e62e74cf79891 Mon Sep 17 00:00:00 2001
From: dancinlife <mk55911@proton.me>
Date: Sun, 7 Jun 2026 06:17:07 +0900
Subject: [PATCH] =?UTF-8?q?feat(domain):=200.11.0=20=E2=80=94=20unify=20sk?=
 =?UTF-8?q?ill-train=20under=20/domain;=20retire=20skillopt=20+=20skillopt?=
 =?UTF-8?q?-hook?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fold the skill-document optimizer into the domain plugin as native ML
subverbs, dropping the pip + separate-plugin dependency.

- /domain doctor · train [--bg] · consume · activate/deactivate · status/log
- bin/_domain_ml.py: ~120-line pip-free SkillOpt loop (rollout → reflect →
  edit → held-out gate) backed by `claude -p` (subscription, not metered)
- bin/_domain_ml.sh: ML subverb dispatcher; domain.md routes ML verbs here,
  all existing domain verbs stay on _domain.hexa unchanged
- examples/toyqa: 6+5 format-sensitive QA proving the loop end-to-end, no pip
- HARD INVARIANT: only a SEPARATE skill.md is optimized — the domain's own
  DOMAIN.md / verdicts / log are never auto-edited (B-separation)
- retire commands/skillopt (0.5.0) + hooks/skillopt-hook (0.1.0): dirs removed,
  marketplace entries deleted; all capability absorbed into /domain
- g22 lockstep: plugin 0.11.0 ↔ marketplace 0.11.0 + CHANGELOG, same commit

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .claude-plugin/marketplace.json               |  16 +-
 CHANGELOG.md                                  |  20 +++
 commands/skillopt/.claude-plugin/plugin.json  |  19 ---
 commands/skillopt/README.md                   |  76 ----------
 commands/skillopt/bin/skillopt.sh             | 142 ------------------
 commands/skillopt/bin/skillopt_run.py         |  72 ---------
 commands/skillopt/commands/skillopt.md        |  37 -----
 commands/skillopt/examples/README.md          |  54 -------
 commands/skillopt/examples/_base/default.yaml | 100 ------------
 commands/skillopt/examples/toyqa/__init__.py  |   0
 commands/skillopt/examples/toyqa/adapter.py   |  95 ------------
 commands/skillopt/examples/toyqa/config.yaml  |  45 ------
 .../skillopt/examples/toyqa/dataloader.py     |  53 -------
 .../skillopt/examples/toyqa/skills/initial.md |   2 -
 .../skillopt-hook/.claude-plugin/plugin.json  |   9 --
 hooks/skillopt-hook/README.md                 |  39 -----
 hooks/skillopt-hook/bin/skillopt_inject.sh    |  37 -----
 hooks/skillopt-hook/hooks/hooks.json          |   7 -
 skills/domain/.claude-plugin/plugin.json      |   4 +-
 skills/domain/bin/_domain_ml.py               | 112 ++++++++++++++
 skills/domain/bin/_domain_ml.sh               |  76 ++++++++++
 skills/domain/commands/domain.md              |  20 ++-
 skills/domain/examples/toyqa/eval.jsonl       |   5 +
 skills/domain/examples/toyqa/skill.md         |   2 +
 skills/domain/examples/toyqa/train.jsonl      |   6 +
 25 files changed, 239 insertions(+), 809 deletions(-)
 delete mode 100644 commands/skillopt/.claude-plugin/plugin.json
 delete mode 100644 commands/skillopt/README.md
 delete mode 100755 commands/skillopt/bin/skillopt.sh
 delete mode 100755 commands/skillopt/bin/skillopt_run.py
 delete mode 100644 commands/skillopt/commands/skillopt.md
 delete mode 100644 commands/skillopt/examples/README.md
 delete mode 100644 commands/skillopt/examples/_base/default.yaml
 delete mode 100644 commands/skillopt/examples/toyqa/__init__.py
 delete mode 100644 commands/skillopt/examples/toyqa/adapter.py
 delete mode 100644 commands/skillopt/examples/toyqa/config.yaml
 delete mode 100644 commands/skillopt/examples/toyqa/dataloader.py
 delete mode 100644 commands/skillopt/examples/toyqa/skills/initial.md
 delete mode 100644 hooks/skillopt-hook/.claude-plugin/plugin.json
 delete mode 100644 hooks/skillopt-hook/README.md
 delete mode 100755 hooks/skillopt-hook/bin/skillopt_inject.sh
 delete mode 100644 hooks/skillopt-hook/hooks/hooks.json
 create mode 100755 skills/domain/bin/_domain_ml.py
 create mode 100755 skills/domain/bin/_domain_ml.sh
 create mode 100644 skills/domain/examples/toyqa/eval.jsonl
 create mode 100644 skills/domain/examples/toyqa/skill.md
 create mode 100644 skills/domain/examples/toyqa/train.jsonl

diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index 5512a865..906f016e 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -401,8 +401,8 @@
     {
       "name": "domain",
       "source": "./skills/domain",
-      "description": "0.10.1 (2026-06-03) `_is_name` now accepts a DIGIT-LEADING NAME — first char may be an UPPERCASE letter OR a digit 0-9, so `/domain init 8VERB` works (previously rejected: digit-leading start); subsequent chars unchanged (UPPERCASE/digit/`-`/`+`). 0.10.0 — bare `/domain` + `/domain list` now LINT for scattered report-like `.md` (a basename token audit|report|verdict|gap|finding|impl OR an ISO date like `2026-06-02`) sitting loose at the repo root or under `docs/` WHILE a domain is active, and emit a `⚠ 분리보관 감지` advisory naming each stray with its `/domain absorb <file>` runbook (excludes the domain's own pair / CHANGELOG / README / `.log.md` / already-absorbed pointer files; read-only). 0.9.0 — new `absorb <file> [--state]` verb enforces combined-storage (합산보관) over scattered docs/*.md: folds an existing separate report/finding `.md` into the ACTIVE domain's single doc pair (default → <NAME>.log.md as a dated `## <ISO> — absorbed <rel>` entry embedding the body; `--state` → <NAME>.md snapshot), then replaces the source with a one-line `<!-- absorbed into … -->` pointer (idempotent — an already-absorbed pointer file is skipped; refuses to absorb the domain's own pair). Resolves the active domain via the same _get_active / _snap_path / _log_path logic the other verbs use. Maintain UPPERCASE <NAME>.md (snapshot = final-goal milestone checkboxes) + sister <NAME>.log.md (append-only step log) at project root — auto-scaffolds both, defaults NAME to uppercase basename of git root. Verbs: /domain <NAME> (SELECT the session's active domain → later verbs default to it + show a final-goal progress bar `▓▓▓░░ NN% · done/total` from the snapshot's checkboxes) · /domain (show active) · /domain list (alias ls — repo-wide index; with a DOMAINS.tape roster it is AUTHORITATIVE: tables each registered domain ★ = active · @goal · progress · location, progress/@goal stay DERIVED so the checked-in roster holds only NAME→path and never churns; flags ghosts + unregistered; no roster → legacy disk-scan) · /domain list --sync (reconcile DOMAINS.tape with disk — bootstraps the roster) · /domain <task> (append `- [x]` to log) · /domain todo <task> (`- [ ]`) · /domain done <match> (flip [ ]→[x]) · /domain title <text> (alias subtitle — OPTIONAL `@title:` display header: icon · name · alias, e.g. `🧠 IIT4 — 의식 측정자(尺)`, rendered in place of plain `active domain: NAME`; absence keeps current output, no lint warning) · /domain new <header>. Progress is final-goal-based (snapshot completion), not log-based — it doesn't drift as the log grows. Path resolution: a DOMAINS.tape roster row (`@domain NAME := \"path\"`) wins FIRST, so a domain can live at ANY path (e.g. `domains/RUNTIME/RUNTIME.md`); absent a roster entry it falls back to root → folder-nested → root-default-scaffold (log follows the snapshot's dir), so folder-nested meta-domains no longer regenerate an empty root scaffold each call. The folder-nested fallback only matches when the nested file's first line is the domain header `# <NAME>` (guards against false-matching an unrelated same-named file like a digest on case-insensitive filesystems). 0.8.8: /domain set now SELF-HEALS the DOMAINS.tape roster (auto-registers an on-disk-but-unregistered domain — the gap that ghosted anima's LIFE) and warns on a stale SSOT shadow (the working-tree <NAME>.md is UNTRACKED or BEHIND origin/main → progress/closure may read a stale local copy; reference_domain_init_untracked_ssot); a perpetual `@goal` (no-termination marker like `종료 조건 없음`/`perpetual`/`open horizon`) renders a ♾️ perpetual badge so the progress bar reads as frontier-depletion, NOT a 100% completion countdown (per feedback-closure-is-physical-limit).",
-      "version": "0.10.1"
+      "description": "0.11.0 (2026-06-07) ML subverbs unify the retired `skillopt` + `skillopt-hook` plugins under `/domain`: `/domain doctor` (skill-train readiness) · `/domain train [--bg]` (NATIVE pip-free SkillOpt loop — rollout → reflect → edit → held-out gate via `claude -p` = your Claude Code subscription, NOT a metered API; only a SEPARATE skill.md is optimized — the domain's own DOMAIN.md / verdicts / log are NEVER touched) · `consume <skill.md>` · `activate`/`deactivate` (write ~/.sidecar/domain-skill/active-skill.md) · `status`/`log`. Bundled `examples/toyqa` (6 format-sensitive QA items) proves the loop end-to-end with NO external data/pip. ML verbs route to `bin/_domain_ml.sh` (+ `bin/_domain_ml.py` engine); ALL existing domain verbs route to `_domain.hexa` UNCHANGED. 0.10.1 (2026-06-03) `_is_name` now accepts a DIGIT-LEADING NAME — first char may be an UPPERCASE letter OR a digit 0-9, so `/domain init 8VERB` works (previously rejected: digit-leading start); subsequent chars unchanged (UPPERCASE/digit/`-`/`+`). 0.10.0 — bare `/domain` + `/domain list` now LINT for scattered report-like `.md` (a basename token audit|report|verdict|gap|finding|impl OR an ISO date like `2026-06-02`) sitting loose at the repo root or under `docs/` WHILE a domain is active, and emit a `⚠ 분리보관 감지` advisory naming each stray with its `/domain absorb <file>` runbook (excludes the domain's own pair / CHANGELOG / README / `.log.md` / already-absorbed pointer files; read-only). 0.9.0 — new `absorb <file> [--state]` verb enforces combined-storage (합산보관) over scattered docs/*.md: folds an existing separate report/finding `.md` into the ACTIVE domain's single doc pair (default → <NAME>.log.md as a dated `## <ISO> — absorbed <rel>` entry embedding the body; `--state` → <NAME>.md snapshot), then replaces the source with a one-line `<!-- absorbed into … -->` pointer (idempotent — an already-absorbed pointer file is skipped; refuses to absorb the domain's own pair). Resolves the active domain via the same _get_active / _snap_path / _log_path logic the other verbs use. Maintain UPPERCASE <NAME>.md (snapshot = final-goal milestone checkboxes) + sister <NAME>.log.md (append-only step log) at project root — auto-scaffolds both, defaults NAME to uppercase basename of git root. Verbs: /domain <NAME> (SELECT the session's active domain → later verbs default to it + show a final-goal progress bar `▓▓▓░░ NN% · done/total` from the snapshot's checkboxes) · /domain (show active) · /domain list (alias ls — repo-wide index; with a DOMAINS.tape roster it is AUTHORITATIVE: tables each registered domain ★ = active · @goal · progress · location, progress/@goal stay DERIVED so the checked-in roster holds only NAME→path and never churns; flags ghosts + unregistered; no roster → legacy disk-scan) · /domain list --sync (reconcile DOMAINS.tape with disk — bootstraps the roster) · /domain <task> (append `- [x]` to log) · /domain todo <task> (`- [ ]`) · /domain done <match> (flip [ ]→[x]) · /domain title <text> (alias subtitle — OPTIONAL `@title:` display header: icon · name · alias, e.g. `🧠 IIT4 — 의식 측정자(尺)`, rendered in place of plain `active domain: NAME`; absence keeps current output, no lint warning) · /domain new <header>. Progress is final-goal-based (snapshot completion), not log-based — it doesn't drift as the log grows. Path resolution: a DOMAINS.tape roster row (`@domain NAME := \"path\"`) wins FIRST, so a domain can live at ANY path (e.g. `domains/RUNTIME/RUNTIME.md`); absent a roster entry it falls back to root → folder-nested → root-default-scaffold (log follows the snapshot's dir), so folder-nested meta-domains no longer regenerate an empty root scaffold each call. The folder-nested fallback only matches when the nested file's first line is the domain header `# <NAME>` (guards against false-matching an unrelated same-named file like a digest on case-insensitive filesystems). 0.8.8: /domain set now SELF-HEALS the DOMAINS.tape roster (auto-registers an on-disk-but-unregistered domain — the gap that ghosted anima's LIFE) and warns on a stale SSOT shadow (the working-tree <NAME>.md is UNTRACKED or BEHIND origin/main → progress/closure may read a stale local copy; reference_domain_init_untracked_ssot); a perpetual `@goal` (no-termination marker like `종료 조건 없음`/`perpetual`/`open horizon`) renders a ♾️ perpetual badge so the progress bar reads as frontier-depletion, NOT a 100% completion countdown (per feedback-closure-is-physical-limit).",
+      "version": "0.11.0"
     },
     {
       "name": "domain-doc-guard",
@@ -434,18 +434,6 @@
       "description": "/gap — multi-axis gap exploration. 42 breakthrough-strategy lenses · 8 families (F4 + F6 each have 6 lenses; `occams-razor` lives in both — hypothesis side and design side). Bare /gap = inline-triage + deep-dive only hot families. /gap full = exhaustive 8-subagent fan-out. /gap <scope> targets the sweep. /gap list prints the catalogue. Surfaces + prioritises gaps; never fixes.",
       "version": "0.2.0"
     },
-    {
-      "name": "skillopt",
-      "source": "./commands/skillopt",
-      "description": "0.5.0 (2026-06-07) AUTO-USE (activate) — `/skillopt activate <skill.md>` copies a trained skill to the SSOT ~/.sidecar/skillopt/active-skill.md so the companion `skillopt-hook` auto-injects it at every SessionStart (no command needed); `/skillopt deactivate` removes it, `/skillopt agent-active on|off` toggles a one-line nudge that tells the agent to PROPOSE (never auto-run) `/skillopt train` for repeatable scored tasks. The USE-vs-TRAIN split: USE is automatic + opt-in + cheap; TRAIN stays a command/agent decision (cost-bearing). 0.4.0 (2026-06-07) BACKGROUND TRAIN + HARDER EXAMPLE — `/skillopt train --bg` detaches the run (nohup → log under ~/.sidecar/skillopt/), returns immediately; `/skillopt status` shows running-state + score/step progress, `/skillopt log` tails. The bundled `examples/toyqa` dataset swapped to 6 format-sensitive QA items (chemical symbols, ISO codes, rounding…) that an EMPTY skill answers in a full sentence → STRICT exact-match fails → a real learning gradient (the optimizer learns a 'reply with only the value' rule, then the held-out gate rises); train_size 6 · batch 3 · sel 5 for stronger signal. 0.3.0 (2026-06-07) EXECUTABLE CLI — `/skillopt` now runs directly via a `!`-exec dispatcher (`bin/skillopt.sh`, prefs-style: resolves its own cached install dir, so the user never types a long path). `/skillopt train` runs the loop in one token; `/skillopt doctor|ckpts|consume <skill.md>|help` all dispatch. Honest 0-edit runs reported as such; claude -p clarified = subscription (NOT metered). 0.2.0 (2026-06-07) sidecar-OWNED env adapter — the only domain code (run a task + score it) now ships IN the plugin at `commands/skillopt/examples/<domain>/`, NOT a clone of the upstream package; `bin/skillopt_run.py` injects sidecar adapters into the upstream hard-coded `_ENV_REGISTRY` at runtime (additive · survives `_register_builtins`) then runs the upstream trainer. Bundled reference `examples/toyqa/` = 4-item exact-match QA proving the loop runs end-to-end on local `claude -p` (no API key, no external data — target+optimizer both shell out to the Claude Code CLI = subscription, NOT metered); `examples/_base/default.yaml` vendors the upstream base config so examples are self-contained (plain `pip install skillopt` ships no `configs/`). Verified: baseline→rollout→reflect(on failures)→gate→test execute against real claude calls; an edit lands only when the optimizer judges a failure generalizable AND the held-out gate improves (no forced edit). 0.1.0 (2026-06-07) initial — /skillopt drives SkillOpt (microsoft/SkillOpt · `pip install skillopt`, arXiv:2605.23904), the text-space optimizer that trains a natural-language SKILL DOCUMENT for a frozen Claude Code agent via rollout → reflect → edit → held-out gate (DL analogy: skill.md = weights · rollout = forward · reflect = backprop · gate = validation early-stop). Subverbs — doctor (pip pkg + claude CLI + harness wiring readiness) · ckpts (list the package's bundled pretrained skill.md artifacts) · consume <skill.md> (load a trained skill into THIS session as additive system guidance) · train <config.yaml> (run the loop with Claude Code as the target harness — env TARGET_BACKEND=claude_code_exec · CLAUDE_SETTING_SOURCES=user,project so the sidecar tapes ride along as FIXED scaffolding; refuses to run without a real scoring env adapter — no fabricated scores) · help. HARD INVARIANT: only the skill.md is optimized — the model, the governance tapes (= SkillOpt's fixed prompts/*_system.md), and every *-guard safety hook stay UNCHANGED; the trained skill is a SEPARATE per-domain file injected via --append-system-prompt; /skillopt never edits a .tape or a guard (a held-out UTILITY gate is not a SAFETY check — kept orthogonal). Wraps the upstream pip CLI; does not vendor or fork it. Companion: microsoft/SkillLens (arXiv:2605.23899).",
-      "version": "0.5.0"
-    },
-    {
-      "name": "skillopt-hook",
-      "source": "./hooks/skillopt-hook",
-      "description": "0.1.0 (2026-06-07) SessionStart hook — auto-USES a trained skill without a command. If the user activated a learned skill (`/skillopt activate <skill.md>` → ~/.sidecar/skillopt/active-skill.md), injects it as additionalContext at session start so the agent applies it automatically (prefs-hook split: the command writes the SSOT, the hook auto-injects). OPT-IN + safe: silent when nothing is activated; NEVER trains (training is cost-bearing — stays a command/agent decision); always exits 0 (fail-open). Optional `~/.sidecar/skillopt/agent-active` marker adds a one-line nudge to PROPOSE (never auto-run) `/skillopt train` for repeatable auto-scorable tasks. Hook half of the skillopt USE-vs-TRAIN split; the `skillopt` command plugin is the TRAIN/activate half. NO env opt-out — the SSOT files are the switch.",
-      "version": "0.1.0"
-    },
     {
       "name": "sidecar",
       "source": "./commands/sidecar",
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f09f11ad..87b5a868 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,26 @@ For the full audit trail, see `git log`.
 
 ---
 
+## 2026-06-07 — 🎓 domain 0.11.0 — `/domain train` 으로 통일 · skillopt + skillopt-hook 폐기
+
+스킬 학습 능력을 별도 플러그인 대신 **`/domain` 하나로 통일**. 네이티브 · pip 무의존.
+
+- 🎓 **domain 0.11.0** — ML 서브버브 추가: `/domain doctor`(학습 준비 점검) ·
+  `/domain train [--bg]`(네이티브 pip-free SkillOpt 루프 — rollout → reflect → edit →
+  held-out gate, 백엔드 `claude -p` = 구독, 과금 API 아님) · `consume <skill.md>` ·
+  `activate`/`deactivate`(`~/.sidecar/domain-skill/active-skill.md`) · `status`/`log`.
+  ML 서브버브는 `bin/_domain_ml.sh`(+ `bin/_domain_ml.py` ~120줄 엔진)로 라우팅,
+  기존 도메인 버브는 `_domain.hexa` 로 **그대로** 유지.
+- 🔒 **HARD INVARIANT** — 학습은 **별도 skill.md 만** 최적화. 도메인 자신의
+  `DOMAIN.md` / verdicts / 로그는 절대 자동 편집 안 함(B 분리).
+- 📦 번들 `examples/toyqa`(format-sensitive QA 6 + 5) — 외부 데이터/pip 없이
+  baseline→rollout→reflect→gate 루프가 end-to-end 도는 걸 증명.
+- 🗑 **skillopt(0.5.0) · skillopt-hook(0.1.0) 폐기** — 두 디렉터리 제거,
+  marketplace 엔트리 삭제. 모든 능력이 `/domain` 으로 흡수됨(USE=activate/hook 대체는
+  domain activate + SessionStart, TRAIN=domain train).
+- 검증: `/domain doctor` smoke(claude CLI + 네이티브 엔진 감지) · py/sh 문법 · JSON 유효 ·
+  g22 lockstep(plugin 0.11.0 ↔ marketplace 0.11.0) + CHANGELOG 동일 커밋.
+
 ## 2026-06-07 — 🎓 skillopt 0.5.0 + skillopt-hook 0.1.0 — 자동 사용(하이브리드)
 
 "명령어 안 치고 자동으로" + "에이전트 적극 활용"을 USE-vs-TRAIN 분리로 구현.
diff --git a/commands/skillopt/.claude-plugin/plugin.json b/commands/skillopt/.claude-plugin/plugin.json
deleted file mode 100644
index a976d69a..00000000
--- a/commands/skillopt/.claude-plugin/plugin.json
+++ /dev/null
@@ -1,19 +0,0 @@
-{
-  "name": "skillopt",
-  "description": "/skillopt — drive SkillOpt (microsoft/SkillOpt · `pip install skillopt`), the text-space optimizer that trains a natural-language SKILL DOCUMENT for a frozen Claude Code agent via rollout → reflect → edit → held-out gate (the DL analogy: the skill.md is the 'weights', rollout=forward, reflect=backprop, gate=validation early-stop). Subverbs — doctor (install + readiness check: pip pkg + claude CLI) · ckpts (list the bundled pretrained skill.md artifacts) · consume <skill.md> (load a trained skill into THIS session as system guidance) · train <config.yaml> (run the optimization loop with Claude Code as the target harness — env TARGET_BACKEND=claude_code_exec, CLAUDE_SETTING_SOURCES=user,project; needs a scoring env adapter) · help. ONLY the skill.md changes — the model, the sidecar governance tapes (= SkillOpt's fixed prompts/*_system.md scaffolding), and the *-guard safety hooks all stay fixed. Never auto-edits governance; it produces a SEPARATE per-domain skill.md the harness injects via --append-system-prompt. Wraps the upstream pip CLI; does not vendor it.",
-  "version": "0.5.0",
-  "author": {
-    "name": "dancinlab"
-  },
-  "repository": "https://github.com/dancinlab/sidecar",
-  "license": "MIT",
-  "keywords": [
-    "claude-code",
-    "command",
-    "skillopt",
-    "skill-optimization",
-    "self-improving-agent",
-    "prompt-optimization",
-    "text-space-optimizer"
-  ]
-}
diff --git a/commands/skillopt/README.md b/commands/skillopt/README.md
deleted file mode 100644
index 2cb26896..00000000
--- a/commands/skillopt/README.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# skillopt
-
-`/skillopt` — sidecar driver for **SkillOpt** (`microsoft/SkillOpt` ·
-`pip install skillopt` · arXiv:2605.23904), a text-space optimizer that trains a
-natural-language **skill document** for a *frozen* Claude Code agent.
-
-## What it is
-
-SkillOpt is the deep-learning loop applied to a markdown sheet instead of weights:
-
-| Deep learning | SkillOpt |
-|---|---|
-| model weights | `skill.md` (the trainable doc) |
-| forward pass | rollout (run the task under the skill) |
-| loss | task evaluator (your scoring) |
-| backprop | reflect (failures → edit patches) |
-| gradient | edit patch (`append`/`insert_after`/`replace`/`delete`) |
-| learning rate | max edits per step |
-| validation / early-stop | held-out **gate** — accept an edit only if the score rises |
-
-```
-[ rollout ] ─▶ [ reflect ] ─▶ [ edit ] ─▶ [ gate ] ─▶ best_skill.md
-     ▲                                                    │
-     └──────── accepted skill feeds the next rollout ◀────┘
-```
-
-## The one hard invariant
-
-Only the `skill.md` changes. The model, the sidecar **governance tapes**
-(commons / recommend / easy — these are SkillOpt's *fixed* `prompts/*_system.md`
-scaffolding, loaded via `--setting-sources user,project`), and every `*-guard`
-safety hook stay UNCHANGED. The trained skill is a **separate per-domain file**
-the harness injects via `--append-system-prompt`. `/skillopt` never edits a tape
-or a guard — a held-out *utility* gate is not a *safety* check, and the two are
-kept orthogonal.
-
-## Subverbs
-
-```
-/skillopt                      readiness check (= doctor)
-/skillopt doctor               pip pkg + claude CLI + harness wiring
-/skillopt ckpts                list bundled pretrained skill.md artifacts
-/skillopt consume <skill.md>   load a trained skill as THIS session's guidance
-/skillopt train <config.yaml>  run rollout→reflect→edit→gate (needs a scoring adapter)
-/skillopt help                 usage
-```
-
-## Requirements
-
-- `pip install skillopt` (the upstream package — `/skillopt` wraps its CLI, does not vendor it).
-- `claude` CLI on PATH (SkillOpt's claude backend shells out to it).
-- For `train`: a scoring **env adapter** (`skillopt/envs/<domain>/`) whose
-  `rollout()` returns `{id, hard:0|1, soft:float}` — the only domain-specific
-  code; everything else (optimizer, gate, reflect) is provided by the package.
-
-## Harness wiring
-
-`train` exports the Claude Code target backend:
-
-```bash
-export TARGET_BACKEND=claude_code_exec
-export CLAUDE_SETTING_SOURCES=user,project   # your sidecar tapes ride along as fixed scaffolding
-```
-
-So a SkillOpt rollout runs *through your own Claude Code harness with your
-settings* — the governance tapes are present as fixed scaffolding while only the
-skill document is being optimized.
-
-## Honesty
-
-- No fabricated scores: `train` refuses to run without a real scoring adapter.
-- Reported improvements are only those the held-out gate accepted.
-- Cost-bearing runs route to the SANDBOX substrate per the active project's rules.
-
-Companion study: `microsoft/SkillLens` (arXiv:2605.23899) — the lifecycle
-diagnosis (skills help ~75% / hurt ~25%; extraction ≠ execution).
diff --git a/commands/skillopt/bin/skillopt.sh b/commands/skillopt/bin/skillopt.sh
deleted file mode 100755
index 588ca355..00000000
--- a/commands/skillopt/bin/skillopt.sh
+++ /dev/null
@@ -1,142 +0,0 @@
-#!/usr/bin/env bash
-# skillopt — sidecar CLI dispatcher for the /skillopt command.
-# Subverbs: doctor (default) · ckpts · consume <skill.md> · train [--config X ...] · help
-# Resolves its own install dir, so the user never types a long path.
-set -uo pipefail
-
-HERE="$(cd "$(dirname "$0")" && pwd)"   # .../skillopt/<ver>/bin
-PY="${SKILLOPT_PY:-python3}"            # override with SKILLOPT_PY=/path/to/venv/bin/python
-
-_have_pkg() { "$PY" -c "import skillopt" >/dev/null 2>&1; }
-_pkg_ver()  { "$PY" -c "import importlib.metadata as m; print(m.version('skillopt'))" 2>/dev/null; }
-
-doctor() {
-  echo "🎓 /skillopt doctor — readiness"
-  if _have_pkg; then echo "  ✅ skillopt: $(_pkg_ver)"
-  else echo "  ⚠ skillopt: NOT installed → pip install skillopt"; fi
-  if command -v claude >/dev/null 2>&1; then echo "  ✅ claude CLI: $(claude --version 2>/dev/null || echo present)"
-  else echo "  ⚠ claude CLI: MISSING (the claude_chat backend shells out to 'claude -p')"; fi
-  echo "  • backend: claude_chat (claude -p = your Claude Code subscription · NOT a metered API)"
-  echo "  • python : $PY  (override via SKILLOPT_PY=/path/to/venv/bin/python)"
-  echo "  • run    : /skillopt train         (bundled examples/toyqa, 1 epoch)"
-}
-
-ckpts() {
-  _have_pkg || { echo "skillopt not installed — /skillopt doctor"; return 1; }
-  local root; root="$("$PY" -c "import skillopt,os;print(os.path.dirname(os.path.dirname(skillopt.__file__)))" 2>/dev/null)"
-  local found; found="$(find "$root" -path '*/ckpt/*' -name '*.md' 2>/dev/null | sed "s#$root/##")"
-  if [ -n "$found" ]; then echo "$found"
-  else echo "(no bundled ckpt skills — the pip package ships code only; see github.com/microsoft/SkillOpt/tree/main/ckpt)"; fi
-}
-
-consume() {
-  local f="${1:-}"
-  [ -n "$f" ] && [ -f "$f" ] || { echo "usage: /skillopt consume <path/to/skill.md>"; return 1; }
-  echo "===== SKILL DOCUMENT (adopt as this session's task guidance) ====="
-  cat "$f"
-  echo "===== END SKILL ($(grep -cE '^[-*] ' "$f" 2>/dev/null || echo 0) rules) ====="
-}
-
-LOGDIR="${SKILLOPT_LOG:-$HOME/.sidecar/skillopt}"
-
-train() {
-  _have_pkg || { echo "skillopt not installed → pip install skillopt (then /skillopt train)"; return 1; }
-  command -v claude >/dev/null 2>&1 || echo "⚠ claude CLI missing — the run will fail at the first rollout."
-  # Background mode: detach, log to a file, return immediately (watch with /skillopt status).
-  if [ "${1:-}" = "--bg" ] || [ "${1:-}" = "-b" ]; then
-    shift
-    mkdir -p "$LOGDIR"
-    local log="$LOGDIR/train-$(date +%Y%m%d-%H%M%S).log"
-    nohup "$PY" "$HERE/skillopt_run.py" "$@" >"$log" 2>&1 &
-    local pid=$!
-    echo "$pid" > "$LOGDIR/train.pid"
-    echo "🌙 background train started — pid $pid"
-    echo "  log   : $log"
-    echo "  watch : /skillopt status   ·   tail: /skillopt log"
-    return 0
-  fi
-  echo "▶ launching SkillOpt train via the in-plugin launcher (registers sidecar envs)…"
-  exec "$PY" "$HERE/skillopt_run.py" "$@"
-}
-
-_latest_log() { ls -1t "$LOGDIR"/train-*.log 2>/dev/null | head -1; }
-
-status() {
-  local pid="" alive="no"
-  [ -f "$LOGDIR/train.pid" ] && pid="$(cat "$LOGDIR/train.pid" 2>/dev/null)"
-  [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null && alive="yes"
-  local log; log="$(_latest_log)"
-  echo "🌙 /skillopt status"
-  echo "  running : $alive${pid:+ (pid $pid)}"
-  echo "  log     : ${log:-(none — run: /skillopt train --bg)}"
-  [ -n "$log" ] || return 0
-  echo "  ── progress (score / step lines) ──"
-  grep -aE 'selection hard|gate\[|STEP |EPOCH |best skill|new best|Test Results|done\]' "$log" 2>/dev/null | tail -8 | sed 's/^/    /'
-  echo "  ── tail ──"; tail -4 "$log" | sed 's/^/    /'
-}
-
-log_cmd() {
-  local log; log="$(_latest_log)"
-  [ -n "$log" ] && tail -40 "$log" || echo "no train log yet — run: /skillopt train --bg"
-}
-
-SKDIR="${SKILLOPT_HOME:-$HOME/.sidecar/skillopt}"
-
-activate() {
-  local f="${1:-}"
-  [ -n "$f" ] && [ -f "$f" ] || { echo "usage: /skillopt activate <path/to/skill.md>"; return 1; }
-  mkdir -p "$SKDIR"; cp "$f" "$SKDIR/active-skill.md"
-  echo "✅ activated — skillopt-hook will auto-inject this skill at session start."
-  echo "  source : $f"
-  echo "  active : $SKDIR/active-skill.md   ·   off: /skillopt deactivate"
-}
-
-deactivate() {
-  rm -f "$SKDIR/active-skill.md"
-  echo "✅ deactivated — no skill is auto-injected (active-skill.md removed)."
-}
-
-agent_active() {
-  case "${1:-}" in
-    on)  mkdir -p "$SKDIR"; : > "$SKDIR/agent-active"; echo "✅ agent-active ON — sessions nudge the agent to PROPOSE /skillopt train for scored tasks." ;;
-    off) rm -f "$SKDIR/agent-active"; echo "✅ agent-active OFF — no train-proposal nudge." ;;
-    *)   [ -f "$SKDIR/agent-active" ] && echo "agent-active: ON" || echo "agent-active: OFF"; echo "usage: /skillopt agent-active on|off" ;;
-  esac
-}
-
-usage() {
-  cat <<'EOF'
-/skillopt — train a skill document for a frozen Claude Code agent (SkillOpt driver)
-
-  /skillopt                      readiness check (= doctor)
-  /skillopt doctor               pip pkg + claude CLI + wiring
-  /skillopt ckpts                list bundled pretrained skill.md
-  /skillopt consume <skill.md>   print a trained skill (agent adopts it this session)
-  /skillopt activate <skill.md>  auto-USE a skill every session (skillopt-hook injects it)
-  /skillopt deactivate           stop auto-using (remove the active skill)
-  /skillopt agent-active on|off  toggle the "propose train" agent nudge
-  /skillopt train [--config X]   run rollout→reflect→edit→gate (default: examples/toyqa)
-  /skillopt train --bg           run in the background (returns immediately)
-  /skillopt status               background run state + score/step progress
-  /skillopt log                  tail the latest train log
-  /skillopt help                 this text
-
-claude -p = your Claude Code subscription (NO metered API cost). pip install skillopt first.
-EOF
-}
-
-sub="${1:-doctor}"; shift 2>/dev/null || true
-case "$sub" in
-  ""|doctor)        doctor ;;
-  ckpts)            ckpts ;;
-  consume)          consume "$@" ;;
-  train)            train "$@" ;;
-  status)           status ;;
-  log)              log_cmd ;;
-  activate)         activate "$@" ;;
-  deactivate)       deactivate ;;
-  agent-active)     agent_active "$@" ;;
-  help|-h|--help)   usage ;;
-  *.md)             consume "$sub" ;;          # bare path → consume
-  *)                echo "unknown subverb: $sub"; echo; usage; exit 2 ;;
-esac
diff --git a/commands/skillopt/bin/skillopt_run.py b/commands/skillopt/bin/skillopt_run.py
deleted file mode 100755
index 50e8e68e..00000000
--- a/commands/skillopt/bin/skillopt_run.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/env python3
-"""skillopt_run — register sidecar-shipped env adapters, then run SkillOpt training.
-
-Upstream `scripts.train._ENV_REGISTRY` is hard-coded to the package's built-in
-benchmarks (no plugin path), so this launcher injects sidecar's own adapters into
-that registry at runtime (the injection survives because `_register_builtins()`
-is purely additive — it never clears the registry), then hands off to the
-upstream trainer.
-
-Usage:
-    python commands/skillopt/bin/skillopt_run.py            # default: examples/toyqa
-    python commands/skillopt/bin/skillopt_run.py --config <path.yaml> [trainer args...]
-
-Requires `pip install skillopt` and the `claude` CLI on PATH (the toyqa example
-config routes both target + optimizer through `claude -p`).
-"""
-from __future__ import annotations
-
-import os
-import sys
-
-_BIN = os.path.dirname(os.path.abspath(__file__))
-_PLUGIN = os.path.dirname(_BIN)                       # commands/skillopt
-_EXAMPLES = os.path.join(_PLUGIN, "examples")
-
-# Make the sidecar example packages importable (e.g. `import toyqa.adapter`).
-sys.path.insert(0, _EXAMPLES)
-
-try:
-    import scripts.train as trainer  # provided by `pip install skillopt`
-except ModuleNotFoundError:
-    sys.exit("skillopt not installed — run `pip install skillopt` (see /skillopt doctor).")
-
-
-def _register_sidecar_envs() -> list[str]:
-    """Inject every sidecar example adapter into the upstream env registry."""
-    registered: list[str] = []
-    try:
-        from toyqa.adapter import ToyQAAdapter
-        trainer._ENV_REGISTRY["toyqa"] = ToyQAAdapter
-        registered.append("toyqa")
-    except Exception as exc:  # noqa: BLE001 — report, don't abort other envs
-        print(f"[skillopt_run] toyqa registration skipped: {exc}", file=sys.stderr)
-    return registered
-
-
-def main() -> None:
-    registered = _register_sidecar_envs()
-    print(f"[skillopt_run] sidecar envs registered: {registered or '(none)'}", file=sys.stderr)
-
-    # Default to the bundled toyqa example when no --config is given.
-    if "--config" not in sys.argv:
-        sys.argv += ["--config", os.path.join(_EXAMPLES, "toyqa", "config.yaml")]
-
-    # Resolve the example's relative `skill_init` (skills/initial.md) against its dir.
-    cfg_idx = sys.argv.index("--config")
-    cfg_path = sys.argv[cfg_idx + 1]
-    cfg_dir = os.path.dirname(os.path.abspath(cfg_path))
-    os.chdir(cfg_dir)
-    # Re-point --config to a basename now that we are in its dir (keeps _base_ relative resolution).
-    sys.argv[cfg_idx + 1] = os.path.basename(cfg_path)
-
-    entry = getattr(trainer, "main", None)
-    if callable(entry):
-        entry()
-    else:  # fallback: execute the module as __main__
-        import runpy
-        runpy.run_module("scripts.train", run_name="__main__", alter_sys=True)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/commands/skillopt/commands/skillopt.md b/commands/skillopt/commands/skillopt.md
deleted file mode 100644
index c0e593aa..00000000
--- a/commands/skillopt/commands/skillopt.md
+++ /dev/null
@@ -1,37 +0,0 @@
----
-description: /skillopt — train a natural-language skill document for a frozen Claude Code agent via SkillOpt (microsoft/SkillOpt · `pip install skillopt`). Executable CLI — subverbs run directly via the in-plugin dispatcher. doctor (pip + claude + wiring readiness) · ckpts (bundled pretrained skill.md) · consume <skill.md> (load a trained skill into THIS session) · train [--config X] (run rollout→reflect→edit→held-out gate; default = bundled examples/toyqa on `claude -p`, no API key) · help. ONLY the skill.md is optimized — model, governance tapes, and *-guard hooks stay fixed; the trained skill is a SEPARATE per-domain file. claude -p = your Claude Code subscription, NOT a metered API.
-argument-hint: "[doctor | ckpts | consume <skill.md> | train [--config X] | help]"
-allowed-tools: Bash, Read
----
-
-# /skillopt — SkillOpt driver (executable CLI)
-
-Running the dispatcher with your arguments:
-
-!`V="$(ls -1 "$HOME/.claude/plugins/cache/sidecar/skillopt" 2>/dev/null | sort -V | tail -1)"; S="$HOME/.claude/plugins/cache/sidecar/skillopt/$V/bin/skillopt.sh"; [ -f "$S" ] && bash "$S" $ARGUMENTS || echo "skillopt not installed in cache — run: sidecar sync"`
-
-## How to read the output above
-
-- **doctor / ckpts / help** — the output is the answer; relay it plainly (note any
-  `pip install skillopt` / `sidecar sync` action it surfaces).
-- **consume** — the printed block IS a trained skill document. **Adopt it as task
-  guidance for THIS session** (additive context, exactly as `--append-system-prompt`
-  would inject it). Confirm in one line which skill + how many rules.
-- **train** — the loop streams baseline → rollout → reflect → gate → test against real
-  `claude -p` calls. On completion report the `best_skill.md` snapshot + its held-out
-  gate delta. An edit lands ONLY when the optimizer judges a failure pattern
-  generalizable AND the gate score rises — **never claim an improvement the gate did
-  not accept** (a 0-edit run is an honest outcome, not a failure).
-- If it printed `pip install skillopt`, that is the user's call — do not auto-install.
-
-## Invariant
-
-Only the `skill.md` is optimized. The model, the sidecar governance tapes (= SkillOpt's
-fixed `prompts/*_system.md` scaffolding), and every `*-guard` safety hook stay
-UNCHANGED — a held-out **utility** gate is not a **safety** check. The trained skill is a
-SEPARATE per-domain file (`examples/<domain>/`); `/skillopt` never edits a `.tape` or a
-guard. Wraps the upstream pip package — does not vendor or fork it.
-
-Make your own scoring adapter: copy `examples/toyqa/` → `examples/<domain>/`, implement
-`rollout()` (run the task, return `{id, hard:0|1, soft:float}`), register it in
-`bin/skillopt_run.py`. Upstream: arXiv:2605.23904. Companion: arXiv:2605.23899 (SkillLens).
diff --git a/commands/skillopt/examples/README.md b/commands/skillopt/examples/README.md
deleted file mode 100644
index 05a4a08c..00000000
--- a/commands/skillopt/examples/README.md
+++ /dev/null
@@ -1,54 +0,0 @@
-# skillopt/examples — sidecar-owned env adapters
-
-The optimizer/trainer/gate live in the upstream `skillopt` pip package. The only
-domain-specific piece — the **env adapter** (run a task + score it) — lives HERE,
-in the plugin, not in a clone of the package.
-
-Upstream `scripts.train._ENV_REGISTRY` is hard-coded to its built-in benchmarks
-(no plugin path), so [`../bin/skillopt_run.py`](../bin/skillopt_run.py) injects
-these adapters into that registry at runtime (additive — survives
-`_register_builtins()`), then hands off to the upstream trainer.
-
-```
-[ pip skillopt ]  optimizer · trainer · gate · reflect      (unchanged)
-       ▲
-       │ runtime register (bin/skillopt_run.py)
-[ sidecar plugin ]  examples/<domain>/  ← the ONLY new code (rollout + score)
-```
-
-## toyqa — reference adapter (end-to-end smoke)
-
-`toyqa/` is a 4-item exact-match QA task that proves the loop runs end-to-end on
-the local Claude Code CLI (`claude -p`, no API key, no external data):
-
-```
-toyqa/
-├─ adapter.py       ToyQAAdapter — rollout (ask claude, STRICT exact-match score) + reflect (delegates)
-├─ dataloader.py    in-memory train/eval items (no split files)
-├─ skills/initial.md   empty seed skill (the thing being optimized)
-└─ config.yaml      claude_chat backend · _base_ → ../_base/default.yaml (vendored)
-```
-
-Run it:
-
-```bash
-pip install skillopt                 # once
-python commands/skillopt/bin/skillopt_run.py    # registers toyqa, runs 1 epoch
-```
-
-Verified: baseline→rollout→reflect(on failures)→gate→test all execute against
-real `claude -p` calls. A skill edit only lands when the optimizer judges a
-failure pattern generalizable AND the held-out gate score improves — honest by
-design (no edit is forced).
-
-## Make your own
-
-Copy `toyqa/` to `examples/<yourdomain>/`, then implement `rollout()` to run your
-task and return `{id, hard:0|1, soft:float}` per item — that's the only required
-code; `reflect()` delegates to the package optimizer. Register it in
-`bin/skillopt_run.py`. Good auto-scorable sidecar candidates: hexa-lang strict
-eval, `/verify` pass-rate, easy-style box/ASCII conformance.
-
-`_base/default.yaml` is a vendored copy of the upstream package default config so
-the examples are self-contained (a plain `pip install skillopt` does not ship
-`configs/`).
diff --git a/commands/skillopt/examples/_base/default.yaml b/commands/skillopt/examples/_base/default.yaml
deleted file mode 100644
index eb2d58da..00000000
--- a/commands/skillopt/examples/_base/default.yaml
+++ /dev/null
@@ -1,100 +0,0 @@
-# SkillOpt default configuration — base for all environments.
-# Environment configs should inherit via: _base_: default.yaml
-
-model:
-  backend: azure_openai
-  optimizer: gpt-5.5
-  target: gpt-5.5
-  optimizer_backend: openai_chat
-  target_backend: openai_chat
-  reasoning_effort: medium
-  rewrite_reasoning_effort: ""
-  rewrite_max_completion_tokens: 64000
-  codex_exec_path: codex
-  codex_exec_sandbox: workspace-write
-  codex_exec_profile: ""
-  codex_exec_full_auto: false
-  codex_exec_reasoning_effort: none
-  codex_exec_use_sdk: auto
-  codex_exec_network_access: false
-  codex_exec_web_search: false
-  codex_exec_approval_policy: never
-  claude_code_exec_path: claude
-  claude_code_exec_profile: ""
-  claude_code_exec_use_sdk: auto
-  claude_code_exec_effort: medium
-  claude_code_exec_max_thinking_tokens: 16384
-  codex_trace_to_optimizer: true
-  azure_openai_endpoint: ""           # e.g. "https://your-resource.openai.azure.com/"
-  azure_openai_api_version: "2024-12-01-preview"
-  azure_openai_api_key: ""       # Fill locally if you do not export AZURE_OPENAI_API_KEY
-  azure_openai_auth_mode: ""           # empty → fall back to AZURE_OPENAI_AUTH_MODE env (default "azure_cli")
-  azure_openai_ad_scope: "https://cognitiveservices.azure.com/.default"
-  azure_openai_managed_identity_client_id: ""
-  optimizer_azure_openai_endpoint: ""   # e.g. "https://your-resource.openai.azure.com/"
-  optimizer_azure_openai_api_version: "2024-12-01-preview"
-  optimizer_azure_openai_api_key: ""
-  optimizer_azure_openai_auth_mode: ""           # empty → fall back to OPTIMIZER_AZURE_OPENAI_AUTH_MODE env, then shared
-  optimizer_azure_openai_ad_scope: "https://cognitiveservices.azure.com/.default"
-  optimizer_azure_openai_managed_identity_client_id: ""
-  target_azure_openai_endpoint: ""   # e.g. "https://your-resource.openai.azure.com/"
-  target_azure_openai_api_version: "2024-12-01-preview"
-  target_azure_openai_api_key: ""
-  target_azure_openai_auth_mode: ""           # empty → fall back to TARGET_AZURE_OPENAI_AUTH_MODE env, then shared
-  target_azure_openai_ad_scope: "https://cognitiveservices.azure.com/.default"
-  target_azure_openai_managed_identity_client_id: ""
-
-  # MiniMax backend settings (minimax_chat target)
-  minimax_base_url: ""          # https://api.minimax.io/v1 if blank
-  minimax_api_key: ""
-  minimax_model: "MiniMax-M2.7"
-  minimax_temperature: "0.7"
-  minimax_max_tokens: "8000"
-  minimax_enable_thinking: "false"
-  optimizer_minimax_base_url: ""    # per-role override
-  target_minimax_base_url: ""       # per-role override
-  optimizer_minimax_api_key: ""
-  target_minimax_api_key: ""
-
-train:
-  num_epochs: 4
-  train_size: 0          # 0 = derive from dataset split when available
-  batch_size: 40
-  accumulation: 1
-  seed: 42
-
-gradient:
-  minibatch_size: 8
-  merge_batch_size: 8
-  analyst_workers: 16
-  max_analyst_rounds: 3
-  failure_only: false
-
-optimizer:
-  learning_rate: 4          # max edits per step (edit_budget)
-  min_learning_rate: 2      # min edits for decay schedulers
-  lr_scheduler: cosine      # constant / linear / cosine / autonomous
-  lr_control_mode: fixed    # fixed / autonomous / none
-  skill_update_mode: patch  # patch / rewrite_from_suggestions / full_rewrite_minibatch
-  use_slow_update: true
-  slow_update_samples: 20
-  slow_update_gate_with_selection: false
-  longitudinal_pair_policy: mixed  # mixed / changed / unchanged
-  use_meta_skill: true
-
-evaluation:
-  use_gate: true
-  sel_env_num: 0
-  test_env_num: 0
-  eval_test: true
-
-env:
-  name: ""
-  skill_init: ""
-  split_mode: ratio       # ratio = build deterministic split from data_path; split_dir = use pre-split train/val/test
-  split_seed: 42
-  split_dir: ""
-  data_path: ""
-  split_output_dir: ""
-  exec_timeout: 120      # per target model/code-agent call timeout in seconds
-  out_root: ""
diff --git a/commands/skillopt/examples/toyqa/__init__.py b/commands/skillopt/examples/toyqa/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/commands/skillopt/examples/toyqa/adapter.py b/commands/skillopt/examples/toyqa/adapter.py
deleted file mode 100644
index 49064b29..00000000
--- a/commands/skillopt/examples/toyqa/adapter.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""Toy QA EnvAdapter — minimal real env for an end-to-end SkillOpt loop.
-
-rollout() is the only domain code: ask the target model (skill = system prompt)
-each question, score STRICT exact-match (so a verbose empty-skill answer fails →
-the optimizer has a real gradient to learn a concise-answer rule). reflect()
-delegates to the package optimizer (run_minibatch_reflect).
-"""
-from __future__ import annotations
-
-import os
-import re
-
-from skillopt.envs.base import EnvAdapter
-from skillopt.gradient.reflect import run_minibatch_reflect
-from skillopt.model import chat_target_messages
-
-from .dataloader import ToyQALoader
-
-
-def _norm(s: str) -> str:
-    return re.sub(r"[^a-z0-9 ]", "", (s or "").lower()).strip()
-
-
-def _extract(message) -> str:
-    text = message if isinstance(message, str) else (getattr(message, "content", "") or str(message))
-    return text.strip().splitlines()[-1].strip() if text.strip() else ""
-
-
-class ToyQAAdapter(EnvAdapter):
-    def __init__(self, analyst_workers: int = 2, workers: int = 2,
-                 failure_only: bool = False, minibatch_size: int = 8,
-                 edit_budget: int = 4, max_completion_tokens: int = 1024,
-                 seed: int = 42, limit: int = 0) -> None:
-        self.analyst_workers = analyst_workers
-        self.workers = workers
-        self.failure_only = failure_only
-        self.minibatch_size = minibatch_size
-        self.edit_budget = edit_budget
-        self.max_completion_tokens = int(max_completion_tokens)
-        self.dataloader = ToyQALoader(seed=seed, limit=limit)
-
-    def setup(self, cfg: dict) -> None:
-        super().setup(cfg)
-        self._cfg = cfg
-        self.dataloader.setup(cfg)
-
-    def get_dataloader(self):
-        return self.dataloader
-
-    def build_train_env(self, batch_size: int, seed: int, **kwargs):
-        return list(self.dataloader.build_train_batch(batch_size, seed).payload)
-
-    def build_eval_env(self, env_num: int, split: str, seed: int, **kwargs):
-        return list(self.dataloader.build_eval_batch(env_num, split, seed).payload)
-
-    def rollout(self, env_manager, skill_content: str, out_dir: str, **kwargs) -> list[dict]:
-        system = (skill_content or "").strip()  # NO format hint — the skill must learn it
-        results: list[dict] = []
-        for item in env_manager:
-            messages = [
-                {"role": "system", "content": system},
-                {"role": "user", "content": item["question"]},
-            ]
-            try:
-                message, _ = chat_target_messages(
-                    messages=messages, max_completion_tokens=self.max_completion_tokens)
-                pred = _extract(message)
-            except Exception as exc:  # noqa: BLE001 — record, never crash the loop
-                pred = ""
-                item = {**item, "_err": str(exc)[:200]}
-            gold = item["answer"]
-            ok = 1 if _norm(pred) == _norm(gold) else 0  # STRICT exact-match
-            results.append({
-                "id": str(item["id"]), "hard": ok, "soft": float(ok),
-                "question": item["question"], "predicted_answer": pred, "gold": gold,
-                "fail_reason": "" if ok else f"expected '{gold}', got '{pred}'",
-            })
-        return results
-
-    def reflect(self, results: list[dict], skill_content: str, out_dir: str, **kwargs):
-        return run_minibatch_reflect(
-            results=results, skill_content=skill_content,
-            prediction_dir=kwargs.get("prediction_dir", os.path.join(out_dir, "predictions")),
-            patches_dir=kwargs.get("patches_dir", os.path.join(out_dir, "patches")),
-            workers=self.analyst_workers, failure_only=self.failure_only,
-            minibatch_size=self.minibatch_size, edit_budget=self.edit_budget,
-            random_seed=kwargs.get("random_seed"),
-            error_system=self.get_error_minibatch_prompt(),
-            success_system=self.get_success_minibatch_prompt(),
-            step_buffer_context=kwargs.get("step_buffer_context", ""),
-            update_mode=getattr(self, "_cfg", {}).get("skill_update_mode", "patch"),
-        )
-
-    def get_task_types(self) -> list[str]:
-        return ["qa"]
diff --git a/commands/skillopt/examples/toyqa/config.yaml b/commands/skillopt/examples/toyqa/config.yaml
deleted file mode 100644
index d7cfbc64..00000000
--- a/commands/skillopt/examples/toyqa/config.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-_base_: ../_base/default.yaml   # vendored copy of the upstream package default (self-contained example)
-
-# Toy QA — minimal end-to-end loop on the local Claude Code CLI (claude -p).
-# No external data, no API key: target + optimizer both shell out to `claude`.
-# Run via:  python commands/skillopt/bin/skillopt_run.py   (registers `toyqa`)
-
-model:
-  optimizer: claude-sonnet-4-6
-  target: claude-sonnet-4-6
-  optimizer_backend: claude_chat
-  target_backend: claude_chat
-  reasoning_effort: low
-
-train:
-  num_epochs: 1
-  train_size: 6
-  batch_size: 3
-  accumulation: 1
-  seed: 42
-
-gradient:
-  minibatch_size: 4
-  merge_batch_size: 4
-  analyst_workers: 2
-  failure_only: false
-
-optimizer:
-  learning_rate: 2
-  min_learning_rate: 1
-  lr_scheduler: constant
-  lr_control_mode: fixed
-  skill_update_mode: patch
-  use_slow_update: false
-  use_meta_skill: false
-
-evaluation:
-  sel_env_num: 5
-  test_env_num: 0
-
-env:
-  name: toyqa
-  skill_init: skills/initial.md
-  max_completion_tokens: 512
-  workers: 2
-  limit: 0
diff --git a/commands/skillopt/examples/toyqa/dataloader.py b/commands/skillopt/examples/toyqa/dataloader.py
deleted file mode 100644
index 50918988..00000000
--- a/commands/skillopt/examples/toyqa/dataloader.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""In-memory toy QA dataloader — no external split files.
-
-Sidecar-owned reference adapter for `/skillopt train`. Proves the loop runs
-end-to-end on the local Claude Code CLI; copy this dir as a template for a real
-auto-scorable sidecar domain (hexa-lang strict eval, /verify pass-rate, etc.).
-"""
-from __future__ import annotations
-
-from skillopt.datasets.base import BaseDataLoader, BatchSpec
-
-# Format-sensitive questions: an EMPTY skill tends to answer in a full sentence
-# ("The chemical symbol for gold is Au.") → STRICT exact-match on the last line FAILS.
-# That failure is the gradient — the optimizer should learn a "reply with ONLY the
-# value, no sentence" rule, after which the held-out eval score rises (an accepted edit).
-_TRAIN = [
-    {"id": "t1", "question": "What is the chemical symbol for gold?", "answer": "Au"},
-    {"id": "t2", "question": "How many sides does a hexagon have?", "answer": "6"},
-    {"id": "t3", "question": "What is the past tense of the verb 'run'?", "answer": "ran"},
-    {"id": "t4", "question": "Round 3.14159 to two decimal places.", "answer": "3.14"},
-    {"id": "t5", "question": "What is 7 times 8?", "answer": "56"},
-    {"id": "t6", "question": "Give the ISO two-letter country code for Germany.", "answer": "DE"},
-]
-_EVAL = [
-    {"id": "e1", "question": "What is the chemical symbol for sodium?", "answer": "Na"},
-    {"id": "e2", "question": "How many sides does a pentagon have?", "answer": "5"},
-    {"id": "e3", "question": "What is the past tense of the verb 'go'?", "answer": "went"},
-    {"id": "e4", "question": "Give the ISO two-letter country code for France.", "answer": "FR"},
-    {"id": "e5", "question": "What is 9 times 6?", "answer": "54"},
-]
-
-
-class ToyQALoader(BaseDataLoader):
-    def __init__(self, seed: int = 42, limit: int = 0) -> None:
-        self.seed = seed
-        self.limit = limit
-
-    def setup(self, cfg: dict) -> None:
-        return None
-
-    def get_train_size(self) -> int:
-        return len(_TRAIN)
-
-    def build_train_batch(self, batch_size: int, seed: int, **kwargs) -> BatchSpec:
-        n = len(_TRAIN)
-        start = seed % n
-        items = [_TRAIN[(start + i) % n] for i in range(min(batch_size, n))]
-        return BatchSpec(phase="train", split="train", seed=seed,
-                         batch_size=len(items), payload=items)
-
-    def build_eval_batch(self, env_num: int, split: str, seed: int, **kwargs) -> BatchSpec:
-        items = _EVAL if not env_num else _EVAL[:env_num]
-        return BatchSpec(phase="eval", split=split, seed=seed,
-                         batch_size=len(items), payload=items)
diff --git a/commands/skillopt/examples/toyqa/skills/initial.md b/commands/skillopt/examples/toyqa/skills/initial.md
deleted file mode 100644
index 54ad5a6b..00000000
--- a/commands/skillopt/examples/toyqa/skills/initial.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# QA Skill
-(No learned rules yet. Rules will be added through reflection.)
diff --git a/hooks/skillopt-hook/.claude-plugin/plugin.json b/hooks/skillopt-hook/.claude-plugin/plugin.json
deleted file mode 100644
index 51f9de08..00000000
--- a/hooks/skillopt-hook/.claude-plugin/plugin.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-  "name": "skillopt-hook",
-  "description": "0.1.0 (2026-06-07) SessionStart hook — auto-USES a trained skill without a command. If the user has activated a learned skill (`/skillopt activate <skill.md>` → ~/.sidecar/skillopt/active-skill.md), this injects it as additionalContext at session start so the agent applies it automatically (the prefs-hook split: command writes the SSOT, hook auto-injects it). OPT-IN + safe: silent when nothing is activated; NEVER trains (training is cost-bearing — stays a command/agent decision); always exits 0 (fail-open, never blocks a session). An optional `~/.sidecar/skillopt/agent-active` marker adds a one-line nudge telling the agent to PROPOSE (never auto-run) `/skillopt train` for repeatable auto-scorable tasks. Hook half of the skillopt USE-vs-TRAIN split; the `/skillopt` command plugin is the TRAIN/activate half.",
-  "version": "0.1.0",
-  "author": { "name": "dancinlab" },
-  "repository": "https://github.com/dancinlab/sidecar",
-  "license": "MIT",
-  "keywords": ["claude-code", "hook", "session-start", "skillopt", "skill-injection", "auto-consume"]
-}
diff --git a/hooks/skillopt-hook/README.md b/hooks/skillopt-hook/README.md
deleted file mode 100644
index 7599d5f1..00000000
--- a/hooks/skillopt-hook/README.md
+++ /dev/null
@@ -1,39 +0,0 @@
-# skillopt-hook
-
-SessionStart hook — **auto-USES a trained skill without typing a command.**
-
-The skillopt USE-vs-TRAIN split (same shape as the prefs command/hook split):
-
-```
-[ /skillopt activate <skill.md> ]  ──▶  ~/.sidecar/skillopt/active-skill.md   (SSOT)
-                                              │
-[ SessionStart ] ──▶ skillopt-hook ──▶ inject active-skill.md as additionalContext
-                                              │
-                                       agent applies it automatically (no command)
-```
-
-## Behavior
-
-- If `~/.sidecar/skillopt/active-skill.md` exists → inject it as session context.
-- If `~/.sidecar/skillopt/agent-active` (opt-in marker) exists → add a one-line nudge
-  telling the agent to **propose** (never auto-run) `/skillopt train` for repeatable
-  auto-scorable tasks.
-- Otherwise → **silent** (no injection).
-
-## Safety
-
-- **Never trains.** Training is cost-bearing (many `claude -p` calls) → it stays a
-  command (`/skillopt train`) or an agent proposal, never an automatic hook action.
-- **Fail-open.** Always exits 0; a hiccup never blocks a session.
-- **Opt-in.** Nothing is injected until the user activates a skill — no global noise.
-
-## Manage (via the `skillopt` command plugin)
-
-```
-/skillopt activate <skill.md>   turn auto-use ON  (copy → active-skill.md)
-/skillopt deactivate            turn auto-use OFF (remove active-skill.md)
-/skillopt agent-active on|off   toggle the train-proposal nudge
-```
-
-Hook half of the split; `/skillopt` (the `skillopt` command plugin) is the
-TRAIN/activate half. `NO env opt-out` — the SSOT files themselves are the switch.
diff --git a/hooks/skillopt-hook/bin/skillopt_inject.sh b/hooks/skillopt-hook/bin/skillopt_inject.sh
deleted file mode 100755
index 3db9b5a2..00000000
--- a/hooks/skillopt-hook/bin/skillopt_inject.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/sh
-# skillopt_inject — SessionStart hook. Emits the ACTIVE learned skill (if the user
-# activated one via `/skillopt activate`) as additionalContext, so a trained skill
-# is auto-used WITHOUT typing a command. NEVER trains (cost) and NEVER fails the
-# session (always exit 0). Silent when nothing is activated.
-set -u
-cat >/dev/null 2>&1   # drain the hook payload on stdin
-
-DIR="${SKILLOPT_HOME:-$HOME/.sidecar/skillopt}"
-SKILL="$DIR/active-skill.md"
-NUDGE="$DIR/agent-active"   # opt-in marker for the train-proposal nudge
-
-ctx=""
-if [ -f "$SKILL" ]; then
-  body=$(cat "$SKILL" 2>/dev/null)
-  ctx="# 🎓 Active learned skill (skillopt) — apply this as task guidance when it fits:
-
-$body"
-fi
-if [ -f "$NUDGE" ]; then
-  line="🎓 skillopt active-use: when you see a repeatable, auto-scorable task with no learned skill yet, PROPOSE \`/skillopt train\` (never auto-run it)."
-  if [ -n "$ctx" ]; then ctx="$ctx
-
-$line"; else ctx="$line"; fi
-fi
-
-[ -n "$ctx" ] || exit 0   # nothing activated → stay silent
-
-# Emit additionalContext as JSON (hookEventName must match the firing event).
-if command -v python3 >/dev/null 2>&1; then
-  python3 - "$ctx" <<'PY'
-import json, sys
-print(json.dumps({"hookSpecificOutput": {"hookEventName": "SessionStart",
-                                          "additionalContext": sys.argv[1]}}))
-PY
-fi
-exit 0
diff --git a/hooks/skillopt-hook/hooks/hooks.json b/hooks/skillopt-hook/hooks/hooks.json
deleted file mode 100644
index 7be545cb..00000000
--- a/hooks/skillopt-hook/hooks/hooks.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-  "hooks": {
-    "SessionStart": [
-      { "hooks": [ { "type": "command", "command": "sh \"${CLAUDE_PLUGIN_ROOT}/bin/skillopt_inject.sh\"" } ] }
-    ]
-  }
-}
diff --git a/skills/domain/.claude-plugin/plugin.json b/skills/domain/.claude-plugin/plugin.json
index 4c324368..b2f39282 100644
--- a/skills/domain/.claude-plugin/plugin.json
+++ b/skills/domain/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
   "name": "domain",
-  "description": "0.10.1 (2026-06-03) `_is_name` now accepts a DIGIT-LEADING NAME — first char may be an UPPERCASE letter OR a digit 0-9, so `/domain init 8VERB` works (previously rejected: digit-leading start); subsequent chars unchanged (UPPERCASE/digit/`-`/`+`). 0.10.0 — bare `/domain` + `/domain list` now LINT for scattered report-like `.md` (a basename token audit|report|verdict|gap|finding|impl OR an ISO date like `2026-06-02`) sitting loose at the repo root or under `docs/` WHILE a domain is active, and emit a `⚠ 분리보관 감지` advisory naming each stray with its `/domain absorb <file>` runbook (excludes the domain's own pair / CHANGELOG / README / `.log.md` / already-absorbed pointer files; read-only). 0.9.0 — new `absorb <file> [--state]` verb enforces combined-storage (합산보관) over scattered docs/*.md: folds an existing separate report/finding `.md` into the ACTIVE domain's single doc pair (default → <NAME>.log.md as a dated `## <ISO> — absorbed <rel>` entry embedding the body; `--state` → <NAME>.md snapshot), then replaces the source with a one-line `<!-- absorbed into … -->` pointer (idempotent — an already-absorbed pointer file is skipped; refuses to absorb the domain's own pair). Resolves the active domain via the same _get_active / _snap_path / _log_path logic the other verbs use. Maintain UPPERCASE <NAME>.md (snapshot = `@goal:` final-goal declaration + `- [ ]` progress milestones) + sister <NAME>.log.md (append-only step log) at project root. NAME = UPPERCASE-or-digit-start + UPPERCASE/digits/`-`/`+` (e.g. `TTR-LM`, `RTSC+HTS`); `_` is rejected (use `-`); `+` enables meta-domain composition (`RTSC+HTS` → RTSC+HTS.md + RTSC+HTS.log.md). Subcommands: /domain init <NAME> [<dir>] (scaffold files + register a DOMAINS.tape roster row; optional <dir> places the pair anywhere, e.g. domains/) · /domain set <NAME> (select session active — bare /domain <NAME> shortcut) · /domain list (alias ls — repo-wide index: with a DOMAINS.tape roster present it is AUTHORITATIVE — tables each registered domain with ★ = active · @goal · progress bar · location; progress/@goal stay DERIVED (read live from each snapshot) so the checked-in roster holds only stable NAME→path data and never churns on a milestone flip; flags ghosts + unregistered disk domains; no roster → legacy disk-scan of root + one folder level) · /domain list --sync (reconcile DOMAINS.tape with disk — append unregistered domains, bootstraps the roster from an existing repo) · /domain goal <text> (declare the FINAL goal — sets the snapshot's `@goal:` line) · /domain milestone <text> (alias ms — add `- [ ]` progress milestone) · /domain title <text> (alias subtitle — set an OPTIONAL `@title:` display header: icon · name · alias, e.g. `🧠 IIT4 — 의식 측정자(尺)`; rendered by bare /domain + set in place of the plain `active domain: NAME`; absence keeps current output, no lint warning) · /domain done <match> (flip a milestone [ ]→[x], else flip a log task) · /domain (show active + @title-or-name + @goal + progress bar `▓▓▓░░ NN% · done/total` + lint) · /domain todo <task> / <task> / new <header> (log). Lint: warns if snapshot has no `@goal:` OR no milestones (NOT `@title:` — it is optional). Progress = milestone completion (snapshot), not log-based. Path resolution: a DOMAINS.tape roster row (`@domain <NAME> := \"<path>\"`) wins FIRST — letting a domain live at ANY path (e.g. `domains/RUNTIME/RUNTIME.md`, `sub/deep/X/X.md`), lifting the old root+one-folder-level limit; absent a roster entry, every verb falls back to (1) existing root `<NAME>.md` → (2) existing folder-nested `<NAME>/<NAME>.md` → (3) root default (fresh scaffold), with the log following the snapshot's directory, so folder-nested meta-domains no longer regenerate an empty root scaffold on each call. The folder-nested fallback only matches when the nested file's first non-empty line is the domain header `# <NAME>` (guards against false-matching an unrelated same-named file — e.g. a digest `<name>/<NAME>.md` — on case-insensitive filesystems).",
-  "version": "0.10.1",
+  "description": "0.11.0 (2026-06-07) ML subverbs unify the retired `skillopt` + `skillopt-hook` plugins under `/domain`: `/domain doctor` (skill-train readiness) · `/domain train [--bg]` (NATIVE pip-free SkillOpt loop — rollout → reflect → edit → held-out gate via `claude -p` = your Claude Code subscription, NOT a metered API; only a SEPARATE skill.md is optimized — the domain's own DOMAIN.md / verdicts / log are NEVER touched) · `consume <skill.md>` · `activate`/`deactivate` (write ~/.sidecar/domain-skill/active-skill.md) · `status`/`log`. Bundled `examples/toyqa` (6 format-sensitive QA items) proves the loop end-to-end with NO external data/pip. ML verbs route to `bin/_domain_ml.sh` (+ `bin/_domain_ml.py` engine); ALL existing domain verbs route to `_domain.hexa` UNCHANGED. 0.10.1 (2026-06-03) `_is_name` now accepts a DIGIT-LEADING NAME — first char may be an UPPERCASE letter OR a digit 0-9, so `/domain init 8VERB` works (previously rejected: digit-leading start); subsequent chars unchanged (UPPERCASE/digit/`-`/`+`). 0.10.0 — bare `/domain` + `/domain list` now LINT for scattered report-like `.md` (a basename token audit|report|verdict|gap|finding|impl OR an ISO date like `2026-06-02`) sitting loose at the repo root or under `docs/` WHILE a domain is active, and emit a `⚠ 분리보관 감지` advisory naming each stray with its `/domain absorb <file>` runbook (excludes the domain's own pair / CHANGELOG / README / `.log.md` / already-absorbed pointer files; read-only). 0.9.0 — new `absorb <file> [--state]` verb enforces combined-storage (합산보관) over scattered docs/*.md: folds an existing separate report/finding `.md` into the ACTIVE domain's single doc pair (default → <NAME>.log.md as a dated `## <ISO> — absorbed <rel>` entry embedding the body; `--state` → <NAME>.md snapshot), then replaces the source with a one-line `<!-- absorbed into … -->` pointer (idempotent — an already-absorbed pointer file is skipped; refuses to absorb the domain's own pair). Resolves the active domain via the same _get_active / _snap_path / _log_path logic the other verbs use. Maintain UPPERCASE <NAME>.md (snapshot = `@goal:` final-goal declaration + `- [ ]` progress milestones) + sister <NAME>.log.md (append-only step log) at project root. NAME = UPPERCASE-or-digit-start + UPPERCASE/digits/`-`/`+` (e.g. `TTR-LM`, `RTSC+HTS`); `_` is rejected (use `-`); `+` enables meta-domain composition (`RTSC+HTS` → RTSC+HTS.md + RTSC+HTS.log.md). Subcommands: /domain init <NAME> [<dir>] (scaffold files + register a DOMAINS.tape roster row; optional <dir> places the pair anywhere, e.g. domains/) · /domain set <NAME> (select session active — bare /domain <NAME> shortcut) · /domain list (alias ls — repo-wide index: with a DOMAINS.tape roster present it is AUTHORITATIVE — tables each registered domain with ★ = active · @goal · progress bar · location; progress/@goal stay DERIVED (read live from each snapshot) so the checked-in roster holds only stable NAME→path data and never churns on a milestone flip; flags ghosts + unregistered disk domains; no roster → legacy disk-scan of root + one folder level) · /domain list --sync (reconcile DOMAINS.tape with disk — append unregistered domains, bootstraps the roster from an existing repo) · /domain goal <text> (declare the FINAL goal — sets the snapshot's `@goal:` line) · /domain milestone <text> (alias ms — add `- [ ]` progress milestone) · /domain title <text> (alias subtitle — set an OPTIONAL `@title:` display header: icon · name · alias, e.g. `🧠 IIT4 — 의식 측정자(尺)`; rendered by bare /domain + set in place of the plain `active domain: NAME`; absence keeps current output, no lint warning) · /domain done <match> (flip a milestone [ ]→[x], else flip a log task) · /domain (show active + @title-or-name + @goal + progress bar `▓▓▓░░ NN% · done/total` + lint) · /domain todo <task> / <task> / new <header> (log). Lint: warns if snapshot has no `@goal:` OR no milestones (NOT `@title:` — it is optional). Progress = milestone completion (snapshot), not log-based. Path resolution: a DOMAINS.tape roster row (`@domain <NAME> := \"<path>\"`) wins FIRST — letting a domain live at ANY path (e.g. `domains/RUNTIME/RUNTIME.md`, `sub/deep/X/X.md`), lifting the old root+one-folder-level limit; absent a roster entry, every verb falls back to (1) existing root `<NAME>.md` → (2) existing folder-nested `<NAME>/<NAME>.md` → (3) root default (fresh scaffold), with the log following the snapshot's directory, so folder-nested meta-domains no longer regenerate an empty root scaffold on each call. The folder-nested fallback only matches when the nested file's first non-empty line is the domain header `# <NAME>` (guards against false-matching an unrelated same-named file — e.g. a digest `<name>/<NAME>.md` — on case-insensitive filesystems).",
+  "version": "0.11.0",
   "author": {
     "name": "dancinlab"
   },
diff --git a/skills/domain/bin/_domain_ml.py b/skills/domain/bin/_domain_ml.py
new file mode 100755
index 00000000..82c13663
--- /dev/null
+++ b/skills/domain/bin/_domain_ml.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+"""_domain_ml — native, pip-free skill-document optimizer for `/domain train`.
+
+The SkillOpt loop (rollout → reflect → edit → held-out gate) in ~120 lines with NO
+`pip install skillopt` dependency. Backend = the local `claude -p` CLI (your Claude
+Code subscription — not a metered API). Only a skill.md (a text file) is optimized;
+the domain's own DOMAIN.md / verdicts / log are NEVER touched.
+
+Usage:
+    _domain_ml.py --skill seed.md --tasks train.jsonl --eval held.jsonl \
+                  [--steps 3] [--out DIR] [--model claude-sonnet-4-6]
+
+tasks/eval JSONL: one object per line {"id","question","answer"} (exact-match scored).
+Always writes <out>/best_skill.md + <out>/loop.log and exits 0-ish.
+"""
+from __future__ import annotations
+
+import argparse, json, os, re, subprocess, sys
+
+CLAUDE = os.environ.get("CLAUDE_CLI_BIN", "claude")
+SETTINGS = os.environ.get("DOMAIN_ML_SETTING_SOURCES", "")  # empty = clean room (no governance skew)
+
+
+def claude(system: str, prompt: str, model: str, timeout: int = 180) -> str:
+    cmd = [CLAUDE, "-p", "--output-format", "text", "--permission-mode", "dontAsk",
+           "--setting-sources", SETTINGS, "--model", model]
+    if system.strip():
+        cmd += ["--append-system-prompt", system]
+    try:
+        r = subprocess.run(cmd + [prompt], capture_output=True, text=True, timeout=timeout)
+        return (r.stdout or "").strip()
+    except Exception as exc:  # noqa: BLE001
+        return f"__ERR__ {exc}"
+
+
+def _norm(s: str) -> str:
+    return re.sub(r"[^a-z0-9 ]", "", (s or "").lower()).strip()
+
+
+def _answer(text: str) -> str:
+    return text.strip().splitlines()[-1].strip() if text.strip() else ""
+
+
+def score(skill: str, items: list[dict], model: str) -> tuple[float, list[dict]]:
+    rows = []
+    for it in items:
+        pred = _answer(claude(skill, it["question"], model))
+        ok = 1 if _norm(pred) == _norm(it["answer"]) else 0
+        rows.append({"id": it["id"], "ok": ok, "pred": pred, "gold": it["answer"]})
+    acc = sum(r["ok"] for r in rows) / max(1, len(rows))
+    return acc, rows
+
+
+def reflect(fails: list[dict], skill: str, model: str) -> str:
+    bullet = "\n".join(f"- gold='{f['gold']}' but got='{f['pred']}'" for f in fails[:8])
+    sys_p = ("You improve a natural-language skill document for a frozen agent. Given "
+             "failures, output ONLY 1-3 concise, generalizable bullet rules (each starting "
+             "with '- ') that would fix them. No preamble, no prose.")
+    out = claude(sys_p, f"Current skill:\n{skill}\n\nFailures:\n{bullet}\n\nNew rules:", model)
+    rules = [ln.rstrip() for ln in out.splitlines() if ln.strip().startswith("- ")]
+    return "\n".join(rules[:3])
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--skill", required=True)
+    ap.add_argument("--tasks", required=True)
+    ap.add_argument("--eval", dest="eval_path", required=True)
+    ap.add_argument("--steps", type=int, default=3)
+    ap.add_argument("--out", default="domain-train-out")
+    ap.add_argument("--model", default=os.environ.get("DOMAIN_ML_MODEL", "claude-sonnet-4-6"))
+    a = ap.parse_args()
+
+    os.makedirs(a.out, exist_ok=True)
+    log = open(os.path.join(a.out, "loop.log"), "w")
+
+    def emit(m: str) -> None:
+        print(m); log.write(m + "\n"); log.flush()
+
+    train = [json.loads(l) for l in open(a.tasks) if l.strip()]
+    held = [json.loads(l) for l in open(a.eval_path) if l.strip()]
+    skill = open(a.skill).read()
+
+    base, _ = score(skill, held, a.model)
+    best, best_acc = skill, base
+    emit(f"[baseline] held-out acc = {base:.3f} ({len(held)} items) · model={a.model}")
+
+    for step in range(1, a.steps + 1):
+        tr_acc, rows = score(best, train, a.model)
+        fails = [r for r in rows if not r["ok"]]
+        emit(f"[step {step}/{a.steps}] train acc={tr_acc:.3f} · fails={len(fails)}")
+        if not fails:
+            emit("  no failures → nothing to learn this step"); continue
+        rules = reflect(fails, best, a.model)
+        if not rules:
+            emit("  optimizer proposed 0 rules → skill unchanged"); continue
+        cand = best.rstrip() + "\n" + rules + "\n"
+        cand_acc, _ = score(cand, held, a.model)
+        if cand_acc > best_acc:
+            best, best_acc = cand, cand_acc
+            emit(f"  ✅ gate ACCEPT: held-out {best_acc:.3f} (↑) · added:\n    " + rules.replace("\n", "\n    "))
+        else:
+            emit(f"  ✗ gate REJECT: cand {cand_acc:.3f} ≤ best {best_acc:.3f} → unchanged")
+
+    out_skill = os.path.join(a.out, "best_skill.md")
+    open(out_skill, "w").write(best)
+    emit(f"[done] best held-out acc {base:.3f} → {best_acc:.3f} · wrote {out_skill}")
+    log.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/skills/domain/bin/_domain_ml.sh b/skills/domain/bin/_domain_ml.sh
new file mode 100755
index 00000000..65dc546b
--- /dev/null
+++ b/skills/domain/bin/_domain_ml.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+# _domain_ml — ML subverbs for /domain (doctor · train · consume · activate ·
+# deactivate · status · log). Native + pip-free: the loop shells out to `claude -p`
+# (your Claude Code subscription, NOT a metered API). Only a skill.md is optimized;
+# the domain's own DOMAIN.md / verdicts / log are never touched.
+set -uo pipefail
+
+HERE="$(cd "$(dirname "$0")" && pwd)"          # .../domain/<ver>/bin
+ENGINE="$HERE/_domain_ml.py"
+EX="$HERE/../examples/toyqa"
+PY="${DOMAIN_ML_PY:-python3}"
+HOME_DIR="${DOMAIN_ML_HOME:-$HOME/.sidecar/domain-skill}"
+
+doctor() {
+  echo "🎓 /domain doctor — skill-train readiness (native · pip-free)"
+  if command -v claude >/dev/null 2>&1; then echo "  ✅ claude CLI: $(claude --version 2>/dev/null || echo present)  ← only hard requirement"
+  else echo "  ⚠ claude CLI: MISSING (the loop shells out to 'claude -p')"; fi
+  [ -f "$ENGINE" ] && echo "  ✅ native engine: bin/_domain_ml.py (no pip install needed)" || echo "  ⚠ engine missing — /reload-plugins"
+  echo "  • backend: claude -p = your Claude Code subscription · NOT a metered API"
+  echo "  • run    : /domain train   (bundled examples/toyqa · writes best_skill.md)"
+}
+
+train() {
+  command -v claude >/dev/null 2>&1 || echo "⚠ claude CLI missing — the run will fail at the first rollout."
+  local skill="$EX/skill.md" tasks="$EX/train.jsonl" eval="$EX/eval.jsonl" steps=3 out="domain-train-out" bg=0 args=()
+  while [ $# -gt 0 ]; do case "$1" in
+    --bg|-b) bg=1; shift ;;
+    --skill) skill="$2"; shift 2 ;;
+    --tasks) tasks="$2"; shift 2 ;;
+    --eval)  eval="$2";  shift 2 ;;
+    --steps) steps="$2"; shift 2 ;;
+    --out)   out="$2";   shift 2 ;;
+    *) args+=("$1"); shift ;;
+  esac; done
+  set -- --skill "$skill" --tasks "$tasks" --eval "$eval" --steps "$steps" --out "$out" "${args[@]}"
+  if [ "$bg" = 1 ]; then
+    mkdir -p "$HOME_DIR"; local log="$HOME_DIR/train-$(date +%Y%m%d-%H%M%S).log"
+    nohup "$PY" "$ENGINE" "$@" >"$log" 2>&1 & echo $! > "$HOME_DIR/train.pid"
+    echo "🌙 background train started — pid $(cat "$HOME_DIR/train.pid")"; echo "  log: $log · watch: /domain status"
+    return 0
+  fi
+  echo "▶ /domain train (native loop · examples/toyqa default)…"
+  exec "$PY" "$ENGINE" "$@"
+}
+
+_latest_log() { ls -1t "$HOME_DIR"/train-*.log 2>/dev/null | head -1; }
+status() {
+  local pid="" alive="no" log; [ -f "$HOME_DIR/train.pid" ] && pid="$(cat "$HOME_DIR/train.pid")"
+  [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null && alive="yes"; log="$(_latest_log)"
+  echo "🌙 /domain train status"; echo "  running: $alive${pid:+ (pid $pid)}"; echo "  log    : ${log:-(none — /domain train --bg)}"
+  [ -n "$log" ] && { echo "  ── progress ──"; grep -aE 'baseline|step |gate |done\]' "$log" 2>/dev/null | tail -8 | sed 's/^/    /'; }
+}
+log_cmd() { local l; l="$(_latest_log)"; [ -n "$l" ] && tail -40 "$l" || echo "no train log yet — /domain train --bg"; }
+
+consume() {
+  local f="${1:-}"; [ -n "$f" ] && [ -f "$f" ] || { echo "usage: /domain consume <skill.md>"; return 1; }
+  echo "===== SKILL DOCUMENT (adopt as this session's task guidance) ====="; cat "$f"
+  echo "===== END SKILL ($(grep -cE '^[-*] ' "$f" 2>/dev/null || echo 0) rules) ====="
+}
+activate() {
+  local f="${1:-}"; [ -n "$f" ] && [ -f "$f" ] || { echo "usage: /domain activate <skill.md>"; return 1; }
+  mkdir -p "$HOME_DIR"; cp "$f" "$HOME_DIR/active-skill.md"
+  echo "✅ activated → $HOME_DIR/active-skill.md (a SessionStart hook can auto-inject it) · off: /domain deactivate"
+}
+deactivate() { rm -f "$HOME_DIR/active-skill.md"; echo "✅ deactivated — no skill auto-injected."; }
+
+case "${1:-doctor}" in
+  doctor)      doctor ;;
+  train)       shift 2>/dev/null || true; train "$@" ;;
+  status)      status ;;
+  log)         log_cmd ;;
+  consume)     shift 2>/dev/null || true; consume "$@" ;;
+  activate)    shift 2>/dev/null || true; activate "$@" ;;
+  deactivate)  deactivate ;;
+  *)           echo "unknown ML subverb: ${1:-}"; exit 2 ;;
+esac
diff --git a/skills/domain/commands/domain.md b/skills/domain/commands/domain.md
index 28aa5771..4dcbcc71 100644
--- a/skills/domain/commands/domain.md
+++ b/skills/domain/commands/domain.md
@@ -1,13 +1,21 @@
 ---
 description: UPPERCASE <DOMAIN>.md (snapshot = `@goal:` final goal + `- [ ]` progress milestones) + <DOMAIN>.log.md (append-only step log). NAME = UPPERCASE-start + UPPERCASE/digits/`-`/`+` (e.g. `TTR-LM`, `RTSC+HTS`); `_` rejected (use `-`); `+` enables meta-domain composition. Subcommands — init `<NAME> [<dir>]` = scaffold files + register a `DOMAINS.tape` roster row (optional `<dir>` places the pair anywhere, e.g. `domains/`) · set `<NAME>` (or bare `<NAME>`) = SELECT session active · list (alias `ls`) = repo-wide index: a `DOMAINS.tape` roster (NAME→path) is AUTHORITATIVE so domains live at any path; progress/@goal stay DERIVED (checked-in roster never churns); flags ghosts + unregistered (★ = active · @goal · progress · location) · list `--sync` = reconcile roster with disk (bootstraps it) · goal `<text>` = declare FINAL goal (sets `@goal:`) · milestone `<text>` (alias `ms`) = add `- [ ]` milestone · title `<text>` (alias `subtitle`) = set optional `@title:` display header (icon · name · alias, e.g. `🧠 IIT4 — 의식 측정자(尺)`) · done `<match>` = flip a milestone, else a log task · absorb `<file> [--state]` = fold a scattered report `.md` into the active `<DOMAIN>.log.md` (or `<DOMAIN>.md` with `--state`) + leave a one-line pointer (합산보관) · bare = show active (title or name) + @goal + progress bar `▓▓▓░░ NN% · done/total` + lint warnings (no @goal / no milestones; `@title:` optional) · todo `<text>` / `<text>` / new `<header>` = log.
-argument-hint: "init <NAME> [<dir>] | set <NAME> | list [--sync] | goal <text> | milestone <text> | title <text> | done <match>"
+argument-hint: "init <NAME> | set <NAME> | list | goal <text> | milestone <text> | done <match> | doctor | train [--bg] | consume <skill.md>"
 allowed-tools: Bash
 ---
 
-!`H="$CLAUDE_PLUGIN_ROOT/bin/_domain.hexa"
-if [ ! -f "$H" ]; then
+!`ROOT="$CLAUDE_PLUGIN_ROOT"
+if [ ! -d "$ROOT/bin" ]; then
     V="$(ls -1 "$HOME/.claude/plugins/cache/sidecar/domain" 2>/dev/null | sort -V | tail -1)"
-    [ -n "$V" ] && H="$HOME/.claude/plugins/cache/sidecar/domain/$V/bin/_domain.hexa"
+    [ -n "$V" ] && ROOT="$HOME/.claude/plugins/cache/sidecar/domain/$V"
 fi
-[ -f "$H" ] || { echo "✗ _domain.hexa not found — run /reload-plugins or hx install sidecar"; exit 1; }
-hexa run "$H" $ARGUMENTS`
+set -- $ARGUMENTS
+case "${1:-}" in
+  doctor|train|consume|activate|deactivate|status|log)
+    M="$ROOT/bin/_domain_ml.sh"
+    [ -f "$M" ] && bash "$M" "$@" || echo "✗ _domain_ml.sh not found — /reload-plugins" ;;
+  *)
+    H="$ROOT/bin/_domain.hexa"
+    [ -f "$H" ] || { echo "✗ _domain.hexa not found — run /reload-plugins or hx install sidecar"; exit 1; }
+    hexa run "$H" $ARGUMENTS ;;
+esac`
diff --git a/skills/domain/examples/toyqa/eval.jsonl b/skills/domain/examples/toyqa/eval.jsonl
new file mode 100644
index 00000000..16e3d587
--- /dev/null
+++ b/skills/domain/examples/toyqa/eval.jsonl
@@ -0,0 +1,5 @@
+{"id":"e1","question":"What is the chemical symbol for sodium?","answer":"Na"}
+{"id":"e2","question":"How many sides does a pentagon have?","answer":"5"}
+{"id":"e3","question":"What is the past tense of the verb 'go'?","answer":"went"}
+{"id":"e4","question":"Give the ISO two-letter country code for France.","answer":"FR"}
+{"id":"e5","question":"What is 9 times 6?","answer":"54"}
diff --git a/skills/domain/examples/toyqa/skill.md b/skills/domain/examples/toyqa/skill.md
new file mode 100644
index 00000000..24711361
--- /dev/null
+++ b/skills/domain/examples/toyqa/skill.md
@@ -0,0 +1,2 @@
+# QA Skill
+(No learned rules yet. Rules are added by /domain train.)
diff --git a/skills/domain/examples/toyqa/train.jsonl b/skills/domain/examples/toyqa/train.jsonl
new file mode 100644
index 00000000..353a8293
--- /dev/null
+++ b/skills/domain/examples/toyqa/train.jsonl
@@ -0,0 +1,6 @@
+{"id":"t1","question":"What is the chemical symbol for gold?","answer":"Au"}
+{"id":"t2","question":"How many sides does a hexagon have?","answer":"6"}
+{"id":"t3","question":"What is the past tense of the verb 'run'?","answer":"ran"}
+{"id":"t4","question":"Round 3.14159 to two decimal places.","answer":"3.14"}
+{"id":"t5","question":"What is 7 times 8?","answer":"56"}
+{"id":"t6","question":"Give the ISO two-letter country code for Germany.","answer":"DE"}