From 004d70a6f607acaaa4f5997d5e50ad5ad839ebf7 Mon Sep 17 00:00:00 2001
From: dancinlife <mk55911@proton.me>
Date: Sun, 7 Jun 2026 05:41:26 +0900
Subject: [PATCH] =?UTF-8?q?feat(skillopt=200.4.0):=20background=20train=20?=
 =?UTF-8?q?+=20harder=20example=20dataset=20(SKILLOPT.easy=20=C2=A78)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements the two "더 다듬을 거리" from domains/SKILLOPT.easy.md §8.

- 🌙 background train — `/skillopt train --bg` detaches via nohup (log under
  ~/.sidecar/skillopt/train-<ts>.log) and returns immediately; `/skillopt status`
  shows running-state + score/step progress, `/skillopt log` tails the latest log.
- 🎯 harder example — examples/toyqa swapped to 6 format-sensitive QA items
  (chemical symbols, ISO codes, rounding, past-tense) that an EMPTY skill answers
  in a full sentence → STRICT exact-match fails → a real learning gradient (the
  optimizer learns a "reply with only the value" rule, then the held-out gate
  rises). train_size 6 · batch 3 · sel 5 for stronger signal.

Validated: skill.sh `bash -n` ✅ · dataloader/config parse ✅ · status/help smoke ✅
· new dataset loads (6 train / 5 eval). g22 lockstep 0.4.0 + CHANGELOG.
---
 .claude-plugin/marketplace.json               |   4 +-
 CHANGELOG.md                                  |  12 +++++
 commands/skillopt/.claude-plugin/plugin.json  |   2 +-
 commands/skillopt/bin/skillopt.sh             |  41 ++++++++++++++++++
 .../__pycache__/__init__.cpython-314.pyc      | Bin 0 -> 173 bytes
 .../__pycache__/dataloader.cpython-314.pyc    | Bin 0 -> 4097 bytes
 commands/skillopt/examples/toyqa/config.yaml  |   6 +--
 .../skillopt/examples/toyqa/dataloader.py     |  22 +++++++---
 8 files changed, 74 insertions(+), 13 deletions(-)
 create mode 100644 commands/skillopt/examples/toyqa/__pycache__/__init__.cpython-314.pyc
 create mode 100644 commands/skillopt/examples/toyqa/__pycache__/dataloader.cpython-314.pyc
diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index 93eb48f..6f578cc 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -437,8 +437,8 @@
     {
       "name": "skillopt",
       "source": "./commands/skillopt",
-      "description": "0.3.0 (2026-06-07) EXECUTABLE CLI — `/skillopt` now runs directly via a `!`-exec dispatcher (`bin/skillopt.sh`, prefs-style: resolves its own cached install dir, so the user never types a long path). `/skillopt train` runs the loop in one token; `/skillopt doctor|ckpts|consume <skill.md>|help` all dispatch. Honest 0-edit runs reported as such; claude -p clarified = subscription (NOT metered). 0.2.0 (2026-06-07) sidecar-OWNED env adapter — the only domain code (run a task + score it) now ships IN the plugin at `commands/skillopt/examples/<domain>/`, NOT a clone of the upstream package; `bin/skillopt_run.py` injects sidecar adapters into the upstream hard-coded `_ENV_REGISTRY` at runtime (additive · survives `_register_builtins`) then runs the upstream trainer. Bundled reference `examples/toyqa/` = 4-item exact-match QA proving the loop runs end-to-end on local `claude -p` (no API key, no external data — target+optimizer both shell out to the Claude Code CLI = subscription, NOT metered); `examples/_base/default.yaml` vendors the upstream base config so examples are self-contained (plain `pip install skillopt` ships no `configs/`). Verified: baseline→rollout→reflect(on failures)→gate→test execute against real claude calls; an edit lands only when the optimizer judges a failure generalizable AND the held-out gate improves (no forced edit). 0.1.0 (2026-06-07) initial — /skillopt drives SkillOpt (microsoft/SkillOpt · `pip install skillopt`, arXiv:2605.23904), the text-space optimizer that trains a natural-language SKILL DOCUMENT for a frozen Claude Code agent via rollout → reflect → edit → held-out gate (DL analogy: skill.md = weights · rollout = forward · reflect = backprop · gate = validation early-stop). Subverbs — doctor (pip pkg + claude CLI + harness wiring readiness) · ckpts (list the package's bundled pretrained skill.md artifacts) · consume <skill.md> (load a trained skill into THIS session as additive system guidance) · train <config.yaml> (run the loop with Claude Code as the target harness — env TARGET_BACKEND=claude_code_exec · CLAUDE_SETTING_SOURCES=user,project so the sidecar tapes ride along as FIXED scaffolding; refuses to run without a real scoring env adapter — no fabricated scores) · help. HARD INVARIANT: only the skill.md is optimized — the model, the governance tapes (= SkillOpt's fixed prompts/*_system.md), and every *-guard safety hook stay UNCHANGED; the trained skill is a SEPARATE per-domain file injected via --append-system-prompt; /skillopt never edits a .tape or a guard (a held-out UTILITY gate is not a SAFETY check — kept orthogonal). Wraps the upstream pip CLI; does not vendor or fork it. Companion: microsoft/SkillLens (arXiv:2605.23899).",
-      "version": "0.3.0"
+      "description": "0.4.0 (2026-06-07) BACKGROUND TRAIN + HARDER EXAMPLE — `/skillopt train --bg` detaches the run (nohup → log under ~/.sidecar/skillopt/), returns immediately; `/skillopt status` shows running-state + score/step progress, `/skillopt log` tails. The bundled `examples/toyqa` dataset swapped to 6 format-sensitive QA items (chemical symbols, ISO codes, rounding…) that an EMPTY skill answers in a full sentence → STRICT exact-match fails → a real learning gradient (the optimizer learns a 'reply with only the value' rule, then the held-out gate rises); train_size 6 · batch 3 · sel 5 for stronger signal. 0.3.0 (2026-06-07) EXECUTABLE CLI — `/skillopt` now runs directly via a `!`-exec dispatcher (`bin/skillopt.sh`, prefs-style: resolves its own cached install dir, so the user never types a long path). `/skillopt train` runs the loop in one token; `/skillopt doctor|ckpts|consume <skill.md>|help` all dispatch. Honest 0-edit runs reported as such; claude -p clarified = subscription (NOT metered). 0.2.0 (2026-06-07) sidecar-OWNED env adapter — the only domain code (run a task + score it) now ships IN the plugin at `commands/skillopt/examples/<domain>/`, NOT a clone of the upstream package; `bin/skillopt_run.py` injects sidecar adapters into the upstream hard-coded `_ENV_REGISTRY` at runtime (additive · survives `_register_builtins`) then runs the upstream trainer. Bundled reference `examples/toyqa/` = 4-item exact-match QA proving the loop runs end-to-end on local `claude -p` (no API key, no external data — target+optimizer both shell out to the Claude Code CLI = subscription, NOT metered); `examples/_base/default.yaml` vendors the upstream base config so examples are self-contained (plain `pip install skillopt` ships no `configs/`). Verified: baseline→rollout→reflect(on failures)→gate→test execute against real claude calls; an edit lands only when the optimizer judges a failure generalizable AND the held-out gate improves (no forced edit). 0.1.0 (2026-06-07) initial — /skillopt drives SkillOpt (microsoft/SkillOpt · `pip install skillopt`, arXiv:2605.23904), the text-space optimizer that trains a natural-language SKILL DOCUMENT for a frozen Claude Code agent via rollout → reflect → edit → held-out gate (DL analogy: skill.md = weights · rollout = forward · reflect = backprop · gate = validation early-stop). Subverbs — doctor (pip pkg + claude CLI + harness wiring readiness) · ckpts (list the package's bundled pretrained skill.md artifacts) · consume <skill.md> (load a trained skill into THIS session as additive system guidance) · train <config.yaml> (run the loop with Claude Code as the target harness — env TARGET_BACKEND=claude_code_exec · CLAUDE_SETTING_SOURCES=user,project so the sidecar tapes ride along as FIXED scaffolding; refuses to run without a real scoring env adapter — no fabricated scores) · help. HARD INVARIANT: only the skill.md is optimized — the model, the governance tapes (= SkillOpt's fixed prompts/*_system.md), and every *-guard safety hook stay UNCHANGED; the trained skill is a SEPARATE per-domain file injected via --append-system-prompt; /skillopt never edits a .tape or a guard (a held-out UTILITY gate is not a SAFETY check — kept orthogonal). Wraps the upstream pip CLI; does not vendor or fork it. Companion: microsoft/SkillLens (arXiv:2605.23899).",
+      "version": "0.4.0"
     },
     {
       "name": "sidecar",
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8635767..eb625cd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,18 @@ For the full audit trail, see `git log`.
 
 ---
 
+## 2026-06-07 — 🎓 skillopt 0.4.0 — 백그라운드 학습 + 더 어려운 예제 (SKILLOPT.easy §8 구현)
+
+SKILLOPT.easy.md 의 "더 다듬을 거리" 2개를 구현.
+
+- 🌙 **백그라운드 학습** — `/skillopt train --bg` 가 학습을 detached(nohup)로 던지고
+  즉시 반환(로그 `~/.sidecar/skillopt/train-<ts>.log`). `/skillopt status` 가 실행
+  여부 + 점수/스텝 진행을 보여주고, `/skillopt log` 가 tail. (세탁기 돌려놓고 딴 일)
+- 🎯 **더 어려운 예제 데이터** — `examples/toyqa` 를 format-sensitive 6문항(화학기호·
+  ISO 코드·반올림·과거형…)으로 교체. 빈 스킬은 문장으로 답해 STRICT exact-match 가
+  틀림 → 실제 학습 gradient 발생(옵티마이저가 "값만 답하기" 규칙을 배우면 held-out
+  gate 상승). train_size 6 · batch 3 · sel 5 로 신호 강화.
+
 ## 2026-06-07 — 🎓 skillopt 0.3.0 — 실행형 CLI (`/skillopt train` 한 줄)
 
 긴 python 경로 손입력 제거. `/skillopt`가 이제 `!`-exec 디스패처
diff --git a/commands/skillopt/.claude-plugin/plugin.json b/commands/skillopt/.claude-plugin/plugin.json
index 4bbd78b..fcd9954 100644
--- a/commands/skillopt/.claude-plugin/plugin.json
+++ b/commands/skillopt/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
   "name": "skillopt",
   "description": "/skillopt — drive SkillOpt (microsoft/SkillOpt · `pip install skillopt`), the text-space optimizer that trains a natural-language SKILL DOCUMENT for a frozen Claude Code agent via rollout → reflect → edit → held-out gate (the DL analogy: the skill.md is the 'weights', rollout=forward, reflect=backprop, gate=validation early-stop). Subverbs — doctor (install + readiness check: pip pkg + claude CLI) · ckpts (list the bundled pretrained skill.md artifacts) · consume <skill.md> (load a trained skill into THIS session as system guidance) · train <config.yaml> (run the optimization loop with Claude Code as the target harness — env TARGET_BACKEND=claude_code_exec, CLAUDE_SETTING_SOURCES=user,project; needs a scoring env adapter) · help. ONLY the skill.md changes — the model, the sidecar governance tapes (= SkillOpt's fixed prompts/*_system.md scaffolding), and the *-guard safety hooks all stay fixed. Never auto-edits governance; it produces a SEPARATE per-domain skill.md the harness injects via --append-system-prompt. Wraps the upstream pip CLI; does not vendor it.",
-  "version": "0.3.0",
+  "version": "0.4.0",
   "author": {
     "name": "dancinlab"
   },
diff --git a/commands/skillopt/bin/skillopt.sh b/commands/skillopt/bin/skillopt.sh
index c98d4b6..9698159 100755
--- a/commands/skillopt/bin/skillopt.sh
+++ b/commands/skillopt/bin/skillopt.sh
@@ -37,13 +37,49 @@ consume() {
   echo "===== END SKILL ($(grep -cE '^[-*] ' "$f" 2>/dev/null || echo 0) rules) ====="
 }
 
+LOGDIR="${SKILLOPT_LOG:-$HOME/.sidecar/skillopt}"
+
 train() {
   _have_pkg || { echo "skillopt not installed → pip install skillopt (then /skillopt train)"; return 1; }
   command -v claude >/dev/null 2>&1 || echo "⚠ claude CLI missing — the run will fail at the first rollout."
+  # Background mode: detach, log to a file, return immediately (watch with /skillopt status).
+  if [ "${1:-}" = "--bg" ] || [ "${1:-}" = "-b" ]; then
+    shift
+    mkdir -p "$LOGDIR"
+    local log="$LOGDIR/train-$(date +%Y%m%d-%H%M%S).log"
+    nohup "$PY" "$HERE/skillopt_run.py" "$@" >"$log" 2>&1 &
+    local pid=$!
+    echo "$pid" > "$LOGDIR/train.pid"
+    echo "🌙 background train started — pid $pid"
+    echo "  log   : $log"
+    echo "  watch : /skillopt status   ·   tail: /skillopt log"
+    return 0
+  fi
   echo "▶ launching SkillOpt train via the in-plugin launcher (registers sidecar envs)…"
   exec "$PY" "$HERE/skillopt_run.py" "$@"
 }
 
+_latest_log() { ls -1t "$LOGDIR"/train-*.log 2>/dev/null | head -1; }
+
+status() {
+  local pid="" alive="no"
+  [ -f "$LOGDIR/train.pid" ] && pid="$(cat "$LOGDIR/train.pid" 2>/dev/null)"
+  [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null && alive="yes"
+  local log; log="$(_latest_log)"
+  echo "🌙 /skillopt status"
+  echo "  running : $alive${pid:+ (pid $pid)}"
+  echo "  log     : ${log:-(none — run: /skillopt train --bg)}"
+  [ -n "$log" ] || return 0
+  echo "  ── progress (score / step lines) ──"
+  grep -aE 'selection hard|gate\[|STEP |EPOCH |best skill|new best|Test Results|done\]' "$log" 2>/dev/null | tail -8 | sed 's/^/    /'
+  echo "  ── tail ──"; tail -4 "$log" | sed 's/^/    /'
+}
+
+log_cmd() {
+  local log; log="$(_latest_log)"
+  [ -n "$log" ] && tail -40 "$log" || echo "no train log yet — run: /skillopt train --bg"
+}
+
 usage() {
   cat <<'EOF'
 /skillopt — train a skill document for a frozen Claude Code agent (SkillOpt driver)
@@ -53,6 +89,9 @@ usage() {
   /skillopt ckpts                list bundled pretrained skill.md
   /skillopt consume <skill.md>   print a trained skill (agent adopts it this session)
   /skillopt train [--config X]   run rollout→reflect→edit→gate (default: examples/toyqa)
+  /skillopt train --bg           run in the background (returns immediately)
+  /skillopt status               background run state + score/step progress
+  /skillopt log                  tail the latest train log
   /skillopt help                 this text
 
 claude -p = your Claude Code subscription (NO metered API cost). pip install skillopt first.
@@ -65,6 +104,8 @@ case "$sub" in
   ckpts)            ckpts ;;
   consume)          consume "$@" ;;
   train)            train "$@" ;;
+  status)           status ;;
+  log)              log_cmd ;;
   help|-h|--help)   usage ;;
   *.md)             consume "$sub" ;;          # bare path → consume
   *)                echo "unknown subverb: $sub"; echo; usage; exit 2 ;;
diff --git a/commands/skillopt/examples/toyqa/__pycache__/__init__.cpython-314.pyc b/commands/skillopt/examples/toyqa/__pycache__/__init__.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..687f75ed5cf17939891ac52eb974479bb89695c7
GIT binary patch
literal 173
zcmdPq<K<!ig1lOlED-$|M8E(el<}Dj$Y@{)Vo+w#Wb|9fP{ah}e+Eh3^3o42PAw|d
z&&|xs)K5vwOU}&8Nlel&&P+*7PAt+-&d<$F%u6ZOFV4=)$;mG$(NC>N%q_@CE!Ho|
vuPjW|kB<jxEQycTE2zB1VUwGmQks)$SHuc58f1Afi1CS;k&&^88OQ<v*oP|o

literal 0
HcmV?d00001

diff --git a/commands/skillopt/examples/toyqa/__pycache__/dataloader.cpython-314.pyc b/commands/skillopt/examples/toyqa/__pycache__/dataloader.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6c12eac388babd4624dd41217a3fbaf571b0fd95
GIT binary patch
literal 4097
zcmbVPO-vlg6|U~-9%cs2j}87|{nIwsFuO1lFxcMgY7w^A2AjpEu_5cQ9-5f~I@{Ad
z?(TtwT;`BOmNts^;FWw?$x@^UOHt$_ha7z5IMGIm)3Oq2bBKJyVI3>Ce6M<D0NGwg
zDQRB4S6x4^zW3Fun)e#_BpAx?e%bl6L1_OVqgv`iMcVosloHdK&u%ka8kI(5Up_{<
zJQ^8Md}T!S)niPL=!&lDQN2!&r6aK7uoAH9h1CFS53ELEHNk3z)gr9Du=c@f6;>Op
z{jd(CBdMq##Y_j$atJN$;`=+W4#PSktfR1w!P10v9M%a~9m48_brRMo%%u8tZR|MH
zyACkD`!i)A67J5A>B$b3e7AT~Vr<mz&GWqDu4ulqqD>5IIm0(B$H;M4`>u3PvmK3p
z;d9qEEX^xermxML7WYz##H5+yS;OshmTjKXTt3fTZfCh><ctCaX!DM%&8EH2P0Ml$
zzUI4zY0sv#>#noJJ<VU_n&mhJ%`Mtqg4?-X-|0oAId<h+7Asz`jAD*!7aS5}qaSEl
zrvOhEO;5|2u4Z_eq4_*tuneDz1r0c2SP7$u*}Sad8gmxcywFoE=j7po*1gETFnTS+
zUeG+>HM73Pmkg^%OD}QPoL|ujhUfLVn6XFWem0d%C6ZE5Z`iiu8@}n-UQ!Mk&l?`U
z2&l#cR6+c_;b#{o3p^XhW-gHZz94p|$UW-6dUATv@HO~Mo6ausyh(fWR`PR>CA?m6
ztlUr#HEeI0yMa7h#034T9V5=NmN)Dbu>tJZ0d9sy-Y)>9MPrE%1=1PxJ-wRTxdb)<
zyt(ah&6yXAVrO$&7ohGM3L>syW6Z#6r|uL1_~}&NK;Pi|h`;79I~sy&=HVVN%_7dL
z3Vlca!PULhHO^|jnTIduh5~tz)<3h_ciCLxVyV%|Ycx-<#eHHy7HjxmSC&FZl(@`Y
z+EXf!FMb@zyl=IuI_;a#@tmAl%ww%_0|WY3J714Wf!qF0U<T3o^lJCc+1~`rf&*NX
zWlVrE1FJv!pEq*}SY|o)a!JR?9S{Ld0nr(PXdqk7FH~1fp|mvxqr^UAzJxp@-`4yH
zvW0}RDj#50vnBb89)YEh<*T|1Jt}nM8|X2i*XeQS38BaIdOxl=;A}Sr^*5ZAiQ$l)
zS)6l5KPplD;DWT(38loQ*lqdTFBr?A?J%2=7A0NMWh5Y(Qd>HL$)e9>bPW`b^IQ-y
z0g`H&dD9P~F87PB9Vp|D%^xzHk|-jaH++v4I}wbfZ+hJI(s|Q1(>Wkz+Lkewt{lg7
z*2#kjIj@!@(qJ~uGWXKRMRyDgsivY-VI^qDWGZKnXEMRT9s5h|{Bj&idn8KiC6m>6
zo|Cvgxk=SpR#fYqqHznIShf-l(i!a&c6{q7loFe6wy9N=Y0a)8h-;yPJ(R|ivP<i@
z9T3R~;bYBjw|F%1sZ*!iZqz|6lK}`m9B<w2c-ShOZa~Kpd(wLF*YPK9?Y~L5@1bE=
z>=ZO$T4_rYUEdfxB`qUvs!QF1NOpc9P;$s~ZYON_B#BcHNy+U(6$VSV=_cvhyAagL
zYlk2dCAIZAiTkaaR6PKEs6zJ$vT7lK1boCXx;;CX+Y2>ZO?FeH_d|FzstS$jfklO&
zWhXA8N%)|mXNmo_Y2T}8RH7wq9Tt6Fk49Po5)pw!xcw98{MPML%)EVKTG~EH1o0pu
zs%=N`T@;G6*tZ6y#Aa$lnW>>OP-&|mf*T}aaXW}+Zs^0K<A?-dBO*~DOydIgGvdz7
zc;+e(+IPaU`<rk!B+t{Pz4y+1`SWtDecQ{bH~+`M4+)f*qq|MG;R^JQ!`Bc@g|xSE
ziD+Cqk*YfMoI$Kz*re)_C#sD@qA_%u;H;Jy6g<k}Ti-$XVQ34=bY~B3vQi4_43S5a
znYyYr-8xVSN;M?S>}Iqmr&v+GbcC@&bUSF%Q$5vTsHc>jv-i;9sFZQ}>M{7pj<P}>
z#>cDeI*!1UyiJFxy6T!!b-Hx6;#~(;K89Z*YBKOb6+wTf25MmuZ;U_{udXn!e1w^U
zx`MGn4>X+b_hGv~rbZPP(E<-5NVP5nB^e7(8;uZSgXrhWhP&Vel8q^S!}SBn3{(@Z
zC(k34k*az$lrUel%oR;5S0SK?e{f_c17G<pf;@+LO6+Mv)4k}ISMN$snp?k)-Bq9V
zpZ#NG{mQrJuKc<G>bK2ff9k)wHgRwN{kCroJutp$`b$f9xuqM8chz$9*wdzi_m|fe
z)-OKo>{?TQdu3hyJ|Z;_A@RhA{`0R0YxzX-2)I9V4EHFzk|S*b)RDI6Jp91LMlW`m
z-t1`wsUa3}ypG#T8M~Md34Q@H1Q9$AL)bfE3RK@pb&=dCiBj5(5Y-{pBAZk_0^?Dw
zgYl@!6Su_2kn#$KR7;KWvuMFSBudg$7C_>|Y$h}yKZ$MYAetmGP`-9}bZNR{`v6R-
zHR@sa+tf=&#sh_(zwR(*#4V;~KAsvL3wKNON=8E>U4!ltE+HhBNxKT+(TM|hK%S5}
z%^@{t&P+(0!><x&`>Q~V$Wvl}KhpWN`umfQVvi22r9m4oVQKB=Yj_|q!|mHaPEaz!
zRs4)@t!MbLWP)y6+sM;1A(6@Com|l(9S?{*MZ>CmY06~gP1o}+)8@95$+&5Zbq7g&
zKw^x<4TzvElfi-U&1}Z-@qb~i=yMPKDeVbQ!mE>>3@!-`Kaa$(*h?9Yju$apmM@~X
zKwd<V@m@9}`rDxwL&lTwpe~ciIoZ(V`6Av`aM`6)73{D6hf2|d2v?(*nnUGMsKY=}
z3UDV$2rqVf7Q{XbA^6ChMHdM_Zwq2GB1zJ7S(TI*4NU6zC;M=-kwp`C2fk`AAG!E=
zv3&hTS-JT{jfXAgACGKg%ZgrW7}@A9-}tnw+^V%)c>I%%rLr<rYv_A)ZXJ`HuQgmR
z>$l1?vt`BD+*6m3OVQ1}wIwfY#LD_qS(&c&run?GGFfZ5ym7ERxogR5<%v(rKh2bt
c*;=RSqMP;6xLi`d-`ge)eU<t*LnXrXUuJpS!vFvP

literal 0
HcmV?d00001

diff --git a/commands/skillopt/examples/toyqa/config.yaml b/commands/skillopt/examples/toyqa/config.yaml
index c069956..d7cfbc6 100644
--- a/commands/skillopt/examples/toyqa/config.yaml
+++ b/commands/skillopt/examples/toyqa/config.yaml
@@ -13,8 +13,8 @@ model:
 
 train:
   num_epochs: 1
-  train_size: 4
-  batch_size: 2
+  train_size: 6
+  batch_size: 3
   accumulation: 1
   seed: 42
 
@@ -34,7 +34,7 @@ optimizer:
   use_meta_skill: false
 
 evaluation:
-  sel_env_num: 3
+  sel_env_num: 5
   test_env_num: 0
 
 env:
diff --git a/commands/skillopt/examples/toyqa/dataloader.py b/commands/skillopt/examples/toyqa/dataloader.py
index e863c84..5091898 100644
--- a/commands/skillopt/examples/toyqa/dataloader.py
+++ b/commands/skillopt/examples/toyqa/dataloader.py
@@ -8,16 +8,24 @@
 
 from skillopt.datasets.base import BaseDataLoader, BatchSpec
 
+# Format-sensitive questions: an EMPTY skill tends to answer in a full sentence
+# ("The chemical symbol for gold is Au.") → STRICT exact-match on the last line FAILS.
+# That failure is the gradient — the optimizer should learn a "reply with ONLY the
+# value, no sentence" rule, after which the held-out eval score rises (an accepted edit).
 _TRAIN = [
-    {"id": "t1", "question": "What is the capital of France?", "answer": "Paris"},
-    {"id": "t2", "question": "2+2 = ?", "answer": "4"},
-    {"id": "t3", "question": "What color is a clear daytime sky?", "answer": "blue"},
-    {"id": "t4", "question": "Who wrote Romeo and Juliet? Surname.", "answer": "Shakespeare"},
+    {"id": "t1", "question": "What is the chemical symbol for gold?", "answer": "Au"},
+    {"id": "t2", "question": "How many sides does a hexagon have?", "answer": "6"},
+    {"id": "t3", "question": "What is the past tense of the verb 'run'?", "answer": "ran"},
+    {"id": "t4", "question": "Round 3.14159 to two decimal places.", "answer": "3.14"},
+    {"id": "t5", "question": "What is 7 times 8?", "answer": "56"},
+    {"id": "t6", "question": "Give the ISO two-letter country code for Germany.", "answer": "DE"},
 ]
 _EVAL = [
-    {"id": "e1", "question": "What is the capital of Japan?", "answer": "Tokyo"},
-    {"id": "e2", "question": "3+5 = ?", "answer": "8"},
-    {"id": "e3", "question": "Largest planet in our solar system?", "answer": "Jupiter"},
+    {"id": "e1", "question": "What is the chemical symbol for sodium?", "answer": "Na"},
+    {"id": "e2", "question": "How many sides does a pentagon have?", "answer": "5"},
+    {"id": "e3", "question": "What is the past tense of the verb 'go'?", "answer": "went"},
+    {"id": "e4", "question": "Give the ISO two-letter country code for France.", "answer": "FR"},
+    {"id": "e5", "question": "What is 9 times 6?", "answer": "54"},
 ]