From 3491ddde97e8bda2fa87e48a59f62ff272211657 Mon Sep 17 00:00:00 2001 From: Shawn Rhoads <24925845+shawnrhoads@users.noreply.github.com> Date: Sun, 26 Apr 2026 18:25:06 -0400 Subject: [PATCH 01/14] Reorganize skill references and add model-class utils heuristic --- skills/pyem-model-generator/README.md | 53 +++++ skills/pyem-model-generator/SKILL.md | 199 +++++++++++++++++ .../references/bayes.json | 35 +++ .../references/example-notebook-template.json | 81 +++++++ .../pyem-model-generator/references/glm.json | 40 ++++ .../references/model-file-template.py | 91 ++++++++ .../references/modelclass-utils-template.py | 112 ++++++++++ .../references/parameter-recovery-notebook.md | 57 +++++ .../references/pyem-runtime-contract.md | 73 +++++++ .../pyem-model-generator/references/rl.json | 30 +++ skills/pyem-model-generator/template.json | 200 ++++++++++++++++++ 11 files changed, 971 insertions(+) create mode 100644 skills/pyem-model-generator/README.md create mode 100644 skills/pyem-model-generator/SKILL.md create mode 100644 skills/pyem-model-generator/references/bayes.json create mode 100644 skills/pyem-model-generator/references/example-notebook-template.json create mode 100644 skills/pyem-model-generator/references/glm.json create mode 100644 skills/pyem-model-generator/references/model-file-template.py create mode 100644 skills/pyem-model-generator/references/modelclass-utils-template.py create mode 100644 skills/pyem-model-generator/references/parameter-recovery-notebook.md create mode 100644 skills/pyem-model-generator/references/pyem-runtime-contract.md create mode 100644 skills/pyem-model-generator/references/rl.json create mode 100644 skills/pyem-model-generator/template.json diff --git a/skills/pyem-model-generator/README.md b/skills/pyem-model-generator/README.md new file mode 100644 index 0000000..8533949 --- /dev/null +++ b/skills/pyem-model-generator/README.md @@ -0,0 +1,53 @@ +# pyem-model-generator skill + +Use this skill to scaffold new computational cognitive models for pyEM that are not in base `pyem`, including from free-text task/model descriptions with equations. + +## What it generates + +- `pyem/models/{model_class}.py` + - `{model_name}_sim(params, nblocks, ntrials, **kwargs)` + - `{model_name}_fit(params, *, prior=None, output="npl", **kwargs)` +- `examples/{model_class}.ipynb` + - model/task description + - simulation and fit demo + - parameter recovery plot (like `examples/rl.ipynb`) + +## Included templates + +- `template.json` (simple main template) +- `references/rl.json`, `references/bayes.json`, `references/glm.json` (reference anchors) +- `references/example-notebook-template.json` (parameter recovery notebook template) + +## How to use + +1. Copy and fill `template.json`. +2. Provide it to the skill in your prompt. +3. Answer follow-up questions if anything is missing. +4. Ask the skill to generate the model module and notebook. + + +## Offline resources + +If the runtime does not include full `pyem` source files, use: + +- `references/pyem-runtime-contract.md` +- `references/parameter-recovery-notebook.md` + +These provide enough contract detail to generate pyEM-compatible sim/fit functions and notebook recovery plots. + + +## Free-text description support + +If you provide prose + equations instead of a filled template, the skill will parse text into `description_input.extracted_spec`. +Use `description_examples.social_signals` in `template.json` as a worked example of this conversion. + + +## Model class utility layout + +Generated model classes should include a shared utility module `pyem/models/{model_class}_utils.py` and one or more model files `pyem/models/{model_name}.py` that import shared helpers: + +```python +from .{model_class}_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params +``` + +Use `references/modelclass-utils-template.py` and `references/model-file-template.py` as starting points. diff --git a/skills/pyem-model-generator/SKILL.md b/skills/pyem-model-generator/SKILL.md new file mode 100644 index 0000000..c8c1c67 --- /dev/null +++ b/skills/pyem-model-generator/SKILL.md @@ -0,0 +1,199 @@ +--- +name: pyem-model-generator +description: Generate new computational cognitive model modules for pyEM and matching example notebooks. Use when asked to add a model not included in base pyEM, scaffold `pyem.models.{modelclass}.py` with `{modelname}_sim(params, nblocks, ntrials, **kwargs)` and `{modelname}_fit(params, *, prior=None, output="npl")`, and produce `examples/{modelclass}.ipynb`. Trigger this skill when the user wants pyEM-style imports, parameter transformations (e.g., `norm2alpha`, `norm2beta`), output dictionaries, model variants, or RL/Bayes/GLM-aligned structure. +--- + +# pyem-model-generator + +Generate pyEM-compatible model code using patterns in: + +- `pyem/models/rl.py` +- `pyem/models/bayes.py` +- `pyem/models/glm.py` + +## Offline/resource mode + +When full `pyem` package files are unavailable, load: + +- `references/pyem-runtime-contract.md` for utility and fit contracts. +- `references/parameter-recovery-notebook.md` for notebook structure and plotting requirements. +- `references/example-notebook-template.json` for a ready-to-fill notebook cell template. + +In offline mode, follow these references instead of guessing utility behavior. + + +## Mandatory clarification behavior + +If any required information is missing or ambiguous, ask the user concise follow-up questions before generating code. + +Required items to confirm: + +1. `model_class`, `model_name`, and target module path. +2. Simulation task inputs (at minimum `nblocks` and `ntrials`; plus task-specific arrays). +3. Parameter list with transform/bounds and semantic role. +4. Sim output keys and fit output modes. +5. Variant definitions (if requested). + + +## Converting free-text model descriptions into code + +When the user provides prose/equations instead of a filled template: + +1. Copy the text into `template.description_input.raw_text`. +2. Extract structured fields into `template.description_input.extracted_spec`: + - task flow (stimulus, choice set, outcomes, feedback), + - tensor shapes (subject/block/trial/option), + - latent state names (`Q_self`, `Q_other`, etc.), + - update equations, + - choice policy equation(s), + - variant catalog and parameter toggles. +3. Normalize equation variables into valid Python names and map them to parameter definitions. +4. If any mapping is ambiguous (for example sign conventions or variant naming), ask targeted follow-up questions before code generation. +5. Generate sim/fit using the extracted spec and preserve the user's intended equations exactly. + +### Example: social signals description + +For text like a “social signals task” with three options (A/B/C), dual value tracks (`Q_self`, `Q_other`), and policy variants, produce: + +- arrays shaped `(nsubjects, nblocks, ntrials, 3)` for option-level values/probabilities, +- update rules for `Q_self` and `Q_other` with separate learning-rate/valence parameters where requested, +- base policy `softmax(beta * (w_self * Q_self + w_other * Q_other))`, +- arbitration variants `p = (1-omega) * softmax(beta * Q_self) + omega * softmax(beta * Q_other)`, +- variant names tracked in template `variants.variant_names` and parsed parameter switches in `description_input.extracted_spec.variant_rules`. + +## pyEM import and function format (pseudo code) + +```python +import numpy as np +from ..utils.math import softmax, norm2alpha, norm2beta, calc_fval + + +def {model_name}_sim( + params: np.ndarray, + nblocks: int = 4, + ntrials: int = 12, + **kwargs, +) -> dict: + """Simulate behavior for one model family.""" + n_subjects = params.shape[0] + rng = np.random.default_rng(kwargs.get("seed", None)) + + beta = params[:, 0] # natural-space for simulation + alpha = params[:, 1] # natural-space for simulation + + choices = np.empty((n_subjects, nblocks, ntrials), dtype=object) + rewards = np.zeros((n_subjects, nblocks, ntrials), dtype=float) + ev = np.zeros((n_subjects, nblocks, ntrials + 1, 2), dtype=float) + pe = np.zeros((n_subjects, nblocks, ntrials), dtype=float) + nll = np.zeros((n_subjects, nblocks, ntrials), dtype=float) + + for s in range(n_subjects): + for b in range(nblocks): + ev[s, b, 0, :] = 0.5 + for t in range(ntrials): + p = softmax(ev[s, b, t, :], beta[s]) + c = rng.choice([0, 1], p=p) + r = 0.0 # replace with task-specific outcome logic + pe[s, b, t] = r - ev[s, b, t, c] + ev[s, b, t + 1, :] = ev[s, b, t, :] + ev[s, b, t + 1, c] = ev[s, b, t, c] + alpha[s] * pe[s, b, t] + nll[s, b, t] = -np.log(p[c] + 1e-12) + + return { + "params": params, + "choices": choices, + "rewards": rewards, + "EV": ev, + "PE": pe, + "nll": nll, + } + + +def {model_name}_fit( + params, + *, + prior=None, + output: str = "npl", + **kwargs, +): + """Compute fit objective compatible with pyEM.""" + beta = float(norm2beta(params[0])) + alpha = float(norm2alpha(params[1])) + + if not (1e-5 <= beta <= 20.0): + return 1e7 + if not (0.0 <= alpha <= 1.0): + return 1e7 + + choices = kwargs["choices"] + rewards = kwargs["rewards"] + nblocks, ntrials = rewards.shape + + ev = np.zeros((nblocks, ntrials + 1, 2), dtype=float) + pe = np.zeros((nblocks, ntrials), dtype=float) + nll = 0.0 + + for b in range(nblocks): + ev[b, 0, :] = 0.5 + for t in range(ntrials): + c = 0 if choices[b, t] == "A" else 1 + p = softmax(ev[b, t, :], beta) + r = rewards[b, t] + pe[b, t] = r - ev[b, t, c] + ev[b, t + 1, :] = ev[b, t, :] + ev[b, t + 1, c] = ev[b, t, c] + alpha * pe[b, t] + nll += -np.log(p[c] + 1e-12) + + if output == "all": + return {"params": np.array([beta, alpha]), "EV": ev, "PE": pe, "nll": nll} + + return calc_fval(nll, params, prior=prior, output=output) +``` + + +## Model-class utility heuristic + +Prefer a per-class shared utility file so models in the same class reuse scaffolding: + +- Generate `pyem/models/{model_class}_utils.py` using `references/modelclass-utils-template.py`. +- Generate each model variant file as `pyem/models/{model_name}.py` using `references/model-file-template.py`. +- In each model file, import shared helpers with: + +```python +from .{model_class}_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params +``` + +Expected sharing points across model files: + +- `_alloc_sim` / `_alloc_fit` for tensor allocation. +- `ModelSpec` for registry metadata. +- `spec_to_id` for deterministic model IDs. +- `build_params` and parameter registry for consistent initialization/transforms. + +When users provide custom helper code, preserve the same API names so all models in a class remain interoperable. + +## Generation workflow + +1. Load `template.json` and, if package context is missing, load relevant files under `references/`. +2. If the user supplied prose/equations, parse them into `description_input.extracted_spec`; if fields remain missing, ask follow-up questions and wait for answers. +3. Generate `pyem/models/{model_class}_utils.py` first, then generate one or more `pyem/models/{model_name}.py` files that import shared helpers from that utils module. +4. Generate `examples/{model_class}.ipynb` from `references/example-notebook-template.json` and align section order to `examples/rl.ipynb`, `examples/bayes.ipynb`, and `examples/glm.ipynb` conventions: + - model/task description, + - simulation demo, + - fit simulated behavior via `EMModel.recover`, + - parameter recovery plot with identity line and correlation per parameter. +5. Run smoke checks: import module, run sim, run fit with `output="npl"`. + +## Optional reference alignment + +If needed, consult `references/rl.json`, `references/bayes.json`, and `references/glm.json` to mirror existing style and output contracts. + +## Notebook generation without repository access + +When no local `examples/` notebooks are accessible: + +1. Load `references/parameter-recovery-notebook.md`. +2. Load `references/example-notebook-template.json`. +3. Fill placeholders and write a valid `.ipynb` (nbformat 4). +4. Ensure imports include `EMModel` and the generated sim/fit functions. +5. Ensure final cells run simulation, fitting, and recovery plotting end-to-end. diff --git a/skills/pyem-model-generator/references/bayes.json b/skills/pyem-model-generator/references/bayes.json new file mode 100644 index 0000000..10c7593 --- /dev/null +++ b/skills/pyem-model-generator/references/bayes.json @@ -0,0 +1,35 @@ +{ + "source": "pyem/models/bayes.py", + "module": "pyem.models.bayes", + "imports": [ + "import numpy as np", + "from ..utils.math import norm2alpha, calc_fval" + ], + "helpers": [ + { + "name": "_generate_fishp", + "signature": "_generate_fishp(lambda1: float, n_fish: int) -> np.ndarray" + } + ], + "functions": [ + { + "name": "bayes_sim", + "kind": "sim", + "signature": "bayes_sim(params: np.ndarray, nblocks: int = 10, ntrials: int = 15, n_fish: int = 3) -> dict", + "param_space": "natural", + "state_keys": ["choices", "observations", "probabilities", "ponds"], + "output_keys": ["params", "choices", "observations", "probabilities", "ponds"] + }, + { + "name": "bayes_fit", + "kind": "fit", + "signature": "bayes_fit(params, choices, observations, prior=None, output: str = 'npl')", + "transform_map": [ + {"index": 0, "name": "lambda1", "transform": "norm2alpha", "bounds": [0.0, 1.0]} + ], + "output_modes": ["npl", "nll", "all"], + "all_output_keys": ["params", "nll"], + "objective_call": "calc_fval(nll, params, prior=prior, output=output)" + } + ] +} diff --git a/skills/pyem-model-generator/references/example-notebook-template.json b/skills/pyem-model-generator/references/example-notebook-template.json new file mode 100644 index 0000000..c5c6639 --- /dev/null +++ b/skills/pyem-model-generator/references/example-notebook-template.json @@ -0,0 +1,81 @@ +{ + "nbformat": 4, + "nbformat_minor": 5, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "cell_templates": [ + { + "cell_type": "markdown", + "source": [ + "# {model_title}\\n", + "\\n", + "## {task_title}\\n", + "This notebook demonstrates simulation, fitting, and parameter recovery." + ] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\\n", + "import matplotlib.pyplot as plt\\n", + "from pyem.api import EMModel\\n", + "from pyem.models.{model_class} import {model_name}_sim, {model_name}_fit" + ] + }, + { + "cell_type": "code", + "source": [ + "rng = np.random.default_rng({random_seed})\\n", + "nsubjects = {nsubjects}\\n", + "nblocks = {nblocks}\\n", + "ntrials = {ntrials}\\n", + "true_params = np.column_stack([\\n", + " rng.uniform({p1_low}, {p1_high}, nsubjects),\\n", + " rng.uniform({p2_low}, {p2_high}, nsubjects),\\n", + "])" + ] + }, + { + "cell_type": "code", + "source": [ + "sim = {model_name}_sim(true_params, nblocks=nblocks, ntrials=ntrials)\\n", + "sim.keys()" + ] + }, + { + "cell_type": "code", + "source": [ + "em = EMModel({model_name}_fit, prior='laplace')\\n", + "recovery = em.recover(sim, {model_name}_fit, n_jobs=1)\\n", + "recovery.keys()" + ] + }, + { + "cell_type": "code", + "source": [ + "fitted = np.asarray(recovery['mfit'])\\n", + "param_names = {param_names}\\n", + "n_params = true_params.shape[1]\\n", + "fig, axes = plt.subplots(1, n_params, figsize=(4 * n_params, 4))\\n", + "for i, ax in enumerate(np.atleast_1d(axes)):\\n", + " ax.scatter(true_params[:, i], fitted[:, i], alpha=0.7)\\n", + " lo = min(true_params[:, i].min(), fitted[:, i].min())\\n", + " hi = max(true_params[:, i].max(), fitted[:, i].max())\\n", + " ax.plot([lo, hi], [lo, hi], 'k--', linewidth=1)\\n", + " r = np.corrcoef(true_params[:, i], fitted[:, i])[0, 1]\\n", + " ax.set_title(f\"{param_names[i]} (r={r:.2f})\")\\n", + " ax.set_xlabel('True')\\n", + " ax.set_ylabel('Recovered')\\n", + "plt.tight_layout()" + ] + } + ] +} diff --git a/skills/pyem-model-generator/references/glm.json b/skills/pyem-model-generator/references/glm.json new file mode 100644 index 0000000..8e4113e --- /dev/null +++ b/skills/pyem-model-generator/references/glm.json @@ -0,0 +1,40 @@ +{ + "source": "pyem/models/glm.py", + "module": "pyem.models.glm", + "imports": [ + "import numpy as np", + "from scipy.stats import norm", + "from scipy.special import expit", + "from ..utils.math import norm2alpha, calc_fval" + ], + "functions": [ + { + "name": "glm_sim", + "kind": "sim", + "signature": "glm_sim(params: np.ndarray, ntrials: int = 100)", + "param_space": "natural", + "output_shape": "tuple", + "output_keys": ["X", "Y"] + }, + { + "name": "glm_fit", + "kind": "fit", + "signature": "glm_fit(params, X, Y, prior=None, output: str = 'npl')", + "transform_map": [], + "output_modes": ["npl", "nll", "all"], + "all_output_keys": ["params", "predicted_y", "negll", "BIC"], + "objective_call": "calc_fval(negll, params, prior=prior, output=output)" + }, + { + "name": "glm_decay_fit", + "kind": "fit", + "signature": "glm_decay_fit(params, X, Y, prior=None, output: str = 'npl', decay: str = 'twostep')", + "transform_map": [ + {"index": -1, "name": "gamma", "transform": "norm2alpha", "bounds": [0.0, 1.0]} + ], + "output_modes": ["npl", "nll", "all"], + "all_output_keys": ["params", "predicted_y", "nll", "BIC"], + "objective_call": "calc_fval(negll, params, prior=prior, output=output)" + } + ] +} diff --git a/skills/pyem-model-generator/references/model-file-template.py b/skills/pyem-model-generator/references/model-file-template.py new file mode 100644 index 0000000..92cec37 --- /dev/null +++ b/skills/pyem-model-generator/references/model-file-template.py @@ -0,0 +1,91 @@ +"""Template for one generated model module within a model class.""" + +from __future__ import annotations + +import numpy as np + +from pyem.utils.math import calc_fval, norm2alpha, norm2beta, softmax +from .model_class_utils import ( + ModelSpec, + _alloc_fit, + _alloc_sim, + build_params, + spec_to_id, +) + + +mod_desc = """Replace with concise model description.""" +mod_spec = {"rl": {"softmax": ["beta"], "rw": ["alpha"]}} +mod_id = spec_to_id(mod_spec) + + +def mod_params(nsubj: int, rng: np.random.Generator | None = None): + """Generate parameter names, transforms, and true parameters.""" + return build_params(["beta", "alpha"], nsubj, rng) + + +def mod_sim(params: np.ndarray, nblocks: int = 4, ntrials: int = 12, **kwargs): + """Simulate behavior for this model variant.""" + nsubj = params.shape[0] + dat = _alloc_sim(nsubj, nblocks, ntrials, nchoices=2) + rng = np.random.default_rng(kwargs.get("seed", None)) + + beta = params[:, 0] + alpha = params[:, 1] + + for s in range(nsubj): + for b in range(nblocks): + dat["ev"][s, b, 0, :] = 0.5 + for t in range(ntrials): + p = softmax(dat["ev"][s, b, t, :], beta[s]) + c = rng.choice([0, 1], p=p) + r = float(rng.integers(0, 2)) + dat["choices"][s, b, t] = "A" if c == 0 else "B" + dat["ch_prob"][s, b, t, :] = p + dat["pe"][s, b, t] = r - dat["ev"][s, b, t, c] + dat["ev"][s, b, t + 1, :] = dat["ev"][s, b, t, :] + dat["ev"][s, b, t + 1, c] = ( + dat["ev"][s, b, t, c] + alpha[s] * dat["pe"][s, b, t] + ) + dat["nll"][s] += -np.log(p[c] + 1e-12) + + dat["params"] = params + return dat + + +def mod_fit(params, choices, rewards, prior=None, output="npl"): + """Fit objective (npl/nll) with optional diagnostics.""" + beta = float(norm2beta(params[0])) + alpha = float(norm2alpha(params[1])) + + if not (1e-5 <= beta <= 20.0) or not (0.0 <= alpha <= 1.0): + return 1e7 + + nblocks, ntrials = rewards.shape + dat = _alloc_fit(nblocks, ntrials, nchoices=2) + + for b in range(nblocks): + dat["ev"][b, 0, :] = 0.5 + for t in range(ntrials): + c = 0 if choices[b, t] == "A" else 1 + p = softmax(dat["ev"][b, t, :], beta) + r = rewards[b, t] + dat["ch_prob"][b, t, :] = p + dat["pe"][b, t] = r - dat["ev"][b, t, c] + dat["ev"][b, t + 1, :] = dat["ev"][b, t, :] + dat["ev"][b, t + 1, c] = dat["ev"][b, t, c] + alpha * dat["pe"][b, t] + dat["nll"] += -np.log(p[c] + 1e-12) + + if output == "all": + return {"params": [beta, alpha], **dat} + return calc_fval(dat["nll"], params, prior=prior, output=output) + + +MODEL = ModelSpec( + id=mod_id, + spec=mod_spec, + desc=mod_desc, + params=mod_params, + sim=mod_sim, + fit=mod_fit, +) diff --git a/skills/pyem-model-generator/references/modelclass-utils-template.py b/skills/pyem-model-generator/references/modelclass-utils-template.py new file mode 100644 index 0000000..a5a1b00 --- /dev/null +++ b/skills/pyem-model-generator/references/modelclass-utils-template.py @@ -0,0 +1,112 @@ +"""Shared utilities for models in one model class. + +Copy this template to `pyem/models/{model_class}_utils.py` and customize. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Callable, Dict, Sequence + +import numpy as np +from pyem.utils.math import norm2alpha, norm2beta + + +@dataclass(frozen=True) +class ModelSpec: + """Container for a generated model implementation.""" + + id: str + spec: dict + desc: str + params: Callable + sim: Callable + fit: Callable + + +def spec_to_id(spec: dict) -> str: + """Convert a spec dictionary into a deterministic model ID string.""" + block_order = ["rl", "cr", "link"] + op_alias = {"linear": "lin"} + + blocks = [] + for block in block_order: + if block not in spec or not spec[block]: + continue + ops = spec[block] + op_strs = [] + for op_name in sorted(ops.keys()): + args = ops[op_name] + name = op_alias.get(op_name, op_name) + if isinstance(args, dict): + for subop in sorted(args.keys()): + subargs = args[subop] + if not isinstance(subargs, (list, tuple)): + raise ValueError( + f"Arguments for {block}:{op_name}:{subop} must be a list" + ) + op_strs.append(f"{name}.{subop}({','.join(subargs)})") + elif isinstance(args, (list, tuple)): + op_strs.append(f"{name}({','.join(args)})") + else: + raise ValueError( + f"Arguments for {block}:{op_name} must be a list or dict" + ) + blocks.append(f"{block}={'/'.join(op_strs)}") + return "|".join(blocks) + + +def _alloc_sim(nsubj: int, nblocks: int, ntrials: int, nchoices: int = 2) -> Dict[str, np.ndarray]: + """Allocate common simulation arrays.""" + return { + "choices": np.zeros((nsubj, nblocks, ntrials), dtype=object), + "ev": np.zeros((nsubj, nblocks, ntrials + 1, nchoices), dtype=float), + "ch_prob": np.zeros((nsubj, nblocks, ntrials, nchoices), dtype=float), + "pe": np.zeros((nsubj, nblocks, ntrials), dtype=float), + "nll": np.zeros((nsubj,), dtype=float), + } + + +def _alloc_fit(nblocks: int, ntrials: int, nchoices: int = 2) -> Dict[str, np.ndarray]: + """Allocate common fitting arrays.""" + return { + "ev": np.zeros((nblocks, ntrials + 1, nchoices), dtype=float), + "ch_prob": np.zeros((nblocks, ntrials, nchoices), dtype=float), + "pe": np.zeros((nblocks, ntrials), dtype=float), + "nll": 0.0, + } + + +@dataclass(frozen=True) +class ParamDef: + """Definition for one model parameter in the registry.""" + + name: str + xform: Callable + init_fn: Callable + + +PARAM_REGISTRY = { + "beta": ParamDef("beta", norm2beta, lambda rng, n: rng.uniform(0.5, 8.0, size=n)), + "alpha": ParamDef("alpha", norm2alpha, lambda rng, n: rng.uniform(0.1, 0.9, size=n)), +} + + +def build_params( + param_names: Sequence[str], + nsubj: int, + rng: np.random.Generator | None = None, +) -> tuple[list[str], list[Callable], np.ndarray]: + """Build transformed parameter metadata and sampled true params.""" + if rng is None: + rng = np.random.default_rng() + + true_params = np.zeros((nsubj, len(param_names)), dtype=float) + param_xform: list[Callable] = [] + + for i, name in enumerate(param_names): + p = PARAM_REGISTRY[name] + param_xform.append(p.xform) + true_params[:, i] = p.init_fn(rng, nsubj) + + return list(param_names), param_xform, true_params diff --git a/skills/pyem-model-generator/references/parameter-recovery-notebook.md b/skills/pyem-model-generator/references/parameter-recovery-notebook.md new file mode 100644 index 0000000..753b77a --- /dev/null +++ b/skills/pyem-model-generator/references/parameter-recovery-notebook.md @@ -0,0 +1,57 @@ +# Parameter recovery notebook pattern + +Use this reference to implement `examples/{model_class}.ipynb` even when base example notebooks are unavailable. + +This pattern is distilled from the repository notebooks under `examples/` (`rl.ipynb`, `bayes.ipynb`, `glm.ipynb`): + +- Intro markdown title + task subtitle. +- Import block (`numpy`, plotting, model sim/fit, `EMModel`). +- Simulation setup cell. +- Simulation execution cell. +- Fit-and-recover cell using `EMModel.recover(...)`. +- Parameter recovery scatter plots with identity lines. + +## Required sections + +1. Model and task overview. +2. Parameter specification (true generating parameters). +3. Simulation run. +4. Fit simulated behavior. +5. Parameter recovery plot. +6. Brief interpretation. + +## Template source + +Use `references/example-notebook-template.json` as the base cell template. Replace all placeholders (for example `{model_name}`, `{model_class}`, bounds, and parameter names). + +## Minimal recovery workflow + +1. Choose `N` synthetic subjects (e.g., `N=50`). +2. Sample true parameters in natural space. +3. Run `{model_name}_sim` to generate behavior. +4. Fit each synthetic subject with `{model_name}_fit` via `EMModel.recover`. +5. Compare true vs recovered parameters. + +## Plot requirements + +- One subplot per parameter. +- X-axis: true values. +- Y-axis: recovered values. +- Add identity line `y=x`. +- Report Pearson correlation `r` in each panel title. + +## Minimal plotting snippet + +```python +fig, axes = plt.subplots(1, n_params, figsize=(4 * n_params, 4)) +for i, ax in enumerate(np.atleast_1d(axes)): + ax.scatter(true_params[:, i], recovered_params[:, i], alpha=0.7) + lo = min(true_params[:, i].min(), recovered_params[:, i].min()) + hi = max(true_params[:, i].max(), recovered_params[:, i].max()) + ax.plot([lo, hi], [lo, hi], "k--", linewidth=1) + r = np.corrcoef(true_params[:, i], recovered_params[:, i])[0, 1] + ax.set_title(f"{param_names[i]} (r={r:.2f})") + ax.set_xlabel("True") + ax.set_ylabel("Recovered") +plt.tight_layout() +``` diff --git a/skills/pyem-model-generator/references/pyem-runtime-contract.md b/skills/pyem-model-generator/references/pyem-runtime-contract.md new file mode 100644 index 0000000..bcce0e5 --- /dev/null +++ b/skills/pyem-model-generator/references/pyem-runtime-contract.md @@ -0,0 +1,73 @@ +# pyEM runtime contract (offline reference) + +Use this file when the full `pyem` package is unavailable. It defines minimal contracts needed to generate compatible model modules. + +## Expected utility imports + +Preferred import in generated model files: + +```python +from ..utils.math import softmax, norm2alpha, norm2beta, calc_fval +``` + +## Utility behavior + +### `softmax(values, beta)` + +- Inputs: + - `values`: 1D array-like action values. + - `beta`: inverse temperature (`> 0`). +- Output: + - Probability vector matching `values` length. +- Stable form: + +```python +z = beta * (values - np.max(values)) +exp_z = np.exp(z) +p = exp_z / np.sum(exp_z) +``` + +### `norm2alpha(x)` + +- Maps unconstrained real `x` to `(0, 1)`. +- Logistic form is acceptable: + +```python +alpha = 1.0 / (1.0 + np.exp(-x)) +``` + +### `norm2beta(x)` + +- Maps unconstrained real `x` to `(1e-5, 20]`. +- Compatible bounded-sigmoid form: + +```python +beta = 1e-5 + (20.0 - 1e-5) / (1.0 + np.exp(-x)) +``` + +### `calc_fval(nll, params, prior=None, output="npl")` + +- `output="nll"`: return `nll`. +- `output="npl"`: return `nll - log_prior(params)` if prior exists; else `nll`. +- `output="all"`: typically handled by caller model function. + +## Prior contract + +Use a lightweight prior dictionary that can be passed through unchanged to pyEM: + +```python +prior = { + "mu": np.array([...]), + "sigma": np.array([...]), +} +``` + +If prior shape mismatches params, return a large penalty value (commonly `1e7`). + +## Fit function contract + +- Signature pattern: + - `{model_name}_fit(params, *, prior=None, output="npl", **kwargs)` +- Must support at least `output in {"npl", "nll", "all"}`. +- Must return scalar for `"npl"`/`"nll"`. +- For invalid transformed params, return `1e7`. diff --git a/skills/pyem-model-generator/references/rl.json b/skills/pyem-model-generator/references/rl.json new file mode 100644 index 0000000..94decd5 --- /dev/null +++ b/skills/pyem-model-generator/references/rl.json @@ -0,0 +1,30 @@ +{ + "source": "pyem/models/rl.py", + "module": "pyem.models.rl", + "imports": [ + "import numpy as np", + "from ..utils.math import softmax, norm2alpha, norm2beta, calc_fval" + ], + "functions": [ + { + "name": "rw1a1b_sim", + "kind": "sim", + "signature": "rw1a1b_sim(params: np.ndarray, nblocks: int = 3, ntrials: int = 24, outcomes: np.ndarray | None = None)", + "param_space": "natural", + "state_keys": ["choices", "rewards", "EV", "ch_prob", "choices_A", "PE", "nll"], + "output_keys": ["params", "choices", "rewards", "EV", "ch_prob", "choices_A", "PE", "nll"] + }, + { + "name": "rw1a1b_fit", + "kind": "fit", + "signature": "rw1a1b_fit(params, choices, rewards, prior=None, output=\"npl\")", + "transform_map": [ + {"index": 0, "name": "beta", "transform": "norm2beta", "bounds": [1e-05, 20.0]}, + {"index": 1, "name": "alpha", "transform": "norm2alpha", "bounds": [0.0, 1.0]} + ], + "output_modes": ["npl", "nll", "all"], + "all_output_keys": ["params", "choices", "choices_A", "rewards", "EV", "PE", "nll"], + "objective_call": "calc_fval(nll, params, prior=prior, output=output)" + } + ] +} diff --git a/skills/pyem-model-generator/template.json b/skills/pyem-model-generator/template.json new file mode 100644 index 0000000..3b60c7a --- /dev/null +++ b/skills/pyem-model-generator/template.json @@ -0,0 +1,200 @@ +{ + "model_class": "", + "model_name": "", + "module_path": "pyem/models/{model_class}.py", + "imports": [ + "import numpy as np", + "from ..utils.math import softmax, norm2alpha, norm2beta, calc_fval" + ], + "parameters": [ + { + "name": "beta", + "index": 0, + "description": "Inverse temperature", + "sim_space": "natural", + "fit_space": "normalized", + "transform": "norm2beta", + "bounds": [ + 1e-05, + 20.0 + ] + }, + { + "name": "alpha", + "index": 1, + "description": "Learning rate", + "sim_space": "natural", + "fit_space": "normalized", + "transform": "norm2alpha", + "bounds": [ + 0.0, + 1.0 + ] + } + ], + "sim": { + "signature": "{model_name}_sim(params, nblocks, ntrials, **kwargs)", + "inputs": { + "nblocks": 4, + "ntrials": 12, + "custom": [ + { + "name": "seed", + "type": "int | None", + "required": false, + "description": "Seed for np.random.default_rng." + }, + { + "name": "outcomes", + "type": "np.ndarray | None", + "required": false, + "description": "Optional externally supplied outcomes." + } + ] + }, + "state_arrays": [ + "choices", + "rewards", + "EV", + "PE", + "nll" + ], + "output_keys": [ + "params", + "choices", + "rewards", + "EV", + "PE", + "nll" + ] + }, + "fit": { + "signature": "{model_name}_fit(params, *, prior=None, output=\"npl\", **kwargs)", + "required_inputs": [ + "choices", + "rewards" + ], + "output_modes": [ + "npl", + "nll", + "all" + ], + "all_output_keys": [ + "params", + "nll" + ], + "failure_return": 10000000.0, + "objective": "calc_fval(nll, params, prior=prior, output=output)" + }, + "variants": { + "requested": false, + "variant_names": [], + "differences": "" + }, + "notebook": { + "path": "examples/{model_class}.ipynb", + "include_parameter_recovery_plot": true, + "sections": [ + "Model and task overview", + "Parameter specification", + "Simulation example", + "Fit simulated behavior", + "Parameter recovery plot", + "Result summary" + ], + "random_seed": 123, + "template_file": "references/example-notebook-template.json", + "pattern_source": "examples/rl.ipynb, examples/bayes.ipynb, examples/glm.ipynb", + "nsubjects": 50, + "param_recovery": { + "use_emmodel_recover": true, + "plot_identity_line": true, + "plot_corr_in_title": true + } + }, + "description_input": { + "raw_text": "", + "extracted_spec": { + "task_name": "", + "task_flow": [], + "choice_set": [ + "A", + "B" + ], + "nchoices": 2, + "state_tensors": [ + { + "name": "Q_self", + "shape": "(nsubjects, nblocks, ntrials+1, nchoices)" + }, + { + "name": "Q_other", + "shape": "(nsubjects, nblocks, ntrials+1, nchoices)" + } + ], + "update_equations": [], + "choice_rule": "", + "variant_rules": [] + } + }, + "description_examples": { + "social_signals": { + "task_name": "social signals task", + "nsubjects": 100, + "sim_inputs": { + "nblocks": 4, + "ntrials": 12, + "nchoices": 3 + }, + "task_flow": [ + "show three options A/B/C", + "store choice in (subject, block, trial) array", + "observe clear signal outcome_self in {0,1}", + "observe partner social feedback mapped to outcome_other" + ], + "state_tensors": [ + { + "name": "Q_self", + "shape": "(nsubjects, nblocks, ntrials+1, 3)" + }, + { + "name": "Q_other", + "shape": "(nsubjects, nblocks, ntrials+1, 3)" + } + ], + "update_equations": [ + "Q_self[s,b,t+1,c] = Q_self[s,b,t,c] + alpha_self * (outcome_self[s,b,t] - Q_self[s,b,t,c])", + "Q_other[s,b,t+1,c] = Q_other[s,b,t,c] + alpha_other * (outcome_other[s,b,t] - Q_other[s,b,t,c])" + ], + "choice_rule": "p(choice) = softmax(beta * (w_self*Q_self[s,b,t,c] + w_other*Q_other[s,b,t,c]))", + "outcome_other_definition": "social_sensitivity_pos*1 if positive else social_sensitivity_neg*-1", + "variant_rules": [ + "1b2w2a: alpha_self, alpha_other with valence-sensitive theta terms", + "1b2w2a2t: alpha_self_pos, alpha_self_neg, alpha_other_pos, alpha_other_neg", + "1b2w2a4t: separate self and other sensitivity for pos/neg outcomes", + "1b1o2a4t/1b1o1a4t/1b1o2a2t/1b1o2a: arbitration p=(1-omega)*softmax(beta*Q_self)+omega*softmax(beta*Q_other)" + ] + } + }, + "scaffold_layout": { + "model_utils_path": "pyem/models/{model_class}_utils.py", + "model_files": [ + "pyem/models/{model_name}.py" + ], + "shared_import": "from .{model_class}_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params", + "required_shared_symbols": [ + "_alloc_sim", + "_alloc_fit", + "ModelSpec", + "spec_to_id", + "build_params" + ], + "utils_template": "references/modelclass-utils-template.py", + "model_template": "references/model-file-template.py" + }, + "reference_specs": [ + "references/rl.json", + "references/bayes.json", + "references/glm.json" + ] +} From 2318bf1e93c6cbab61d95b9fea699943b7d02bb3 Mon Sep 17 00:00:00 2001 From: Shawn Rhoads <24925845+shawnrhoads@users.noreply.github.com> Date: Sun, 26 Apr 2026 18:43:39 -0400 Subject: [PATCH 02/14] Refactor skill around modclass utils and standalone reference templates --- skills/pyem-model-generator/README.md | 63 ++--- skills/pyem-model-generator/SKILL.md | 229 ++++++------------ .../references/bayes.json | 55 ++--- .../pyem-model-generator/references/glm.json | 59 ++--- .../references/model-file-template.py | 9 +- .../references/modelclass-utils-template.py | 56 ++++- .../references/parameter-recovery-notebook.md | 2 +- .../pyem-model-generator/references/rl.json | 50 ++-- .../references/template.json | 67 +++++ skills/pyem-model-generator/template.json | 195 +++------------ 10 files changed, 315 insertions(+), 470 deletions(-) create mode 100644 skills/pyem-model-generator/references/template.json diff --git a/skills/pyem-model-generator/README.md b/skills/pyem-model-generator/README.md index 8533949..83c12b8 100644 --- a/skills/pyem-model-generator/README.md +++ b/skills/pyem-model-generator/README.md @@ -1,53 +1,42 @@ # pyem-model-generator skill -Use this skill to scaffold new computational cognitive models for pyEM that are not in base `pyem`, including from free-text task/model descriptions with equations. +Use this skill to scaffold new computational cognitive model modules and example notebooks from either structured JSON or free-text equations. -## What it generates +## Primary template -- `pyem/models/{model_class}.py` - - `{model_name}_sim(params, nblocks, ntrials, **kwargs)` - - `{model_name}_fit(params, *, prior=None, output="npl", **kwargs)` -- `examples/{model_class}.ipynb` - - model/task description - - simulation and fit demo - - parameter recovery plot (like `examples/rl.ipynb`) +- `references/template.json` (canonical template) +- `template.json` (copy of canonical template for convenience) -## Included templates +## Reference anchors -- `template.json` (simple main template) -- `references/rl.json`, `references/bayes.json`, `references/glm.json` (reference anchors) -- `references/example-notebook-template.json` (parameter recovery notebook template) +- `references/rl.json` +- `references/bayes.json` +- `references/glm.json` -## How to use +## Model-class utility layout -1. Copy and fill `template.json`. -2. Provide it to the skill in your prompt. -3. Answer follow-up questions if anything is missing. -4. Ask the skill to generate the model module and notebook. +Generated model classes should include: +- `pyem/models/{model_class}_utils.py` +- one or more `pyem/models/{model_name}.py` -## Offline resources - -If the runtime does not include full `pyem` source files, use: - -- `references/pyem-runtime-contract.md` -- `references/parameter-recovery-notebook.md` - -These provide enough contract detail to generate pyEM-compatible sim/fit functions and notebook recovery plots. - - -## Free-text description support +Each model file should follow this shared import contract: -If you provide prose + equations instead of a filled template, the skill will parse text into `description_input.extracted_spec`. -Use `description_examples.social_signals` in `template.json` as a worked example of this conversion. +```python +from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params +``` +Each generated `{model_name}.py` should define: -## Model class utility layout +- attributes: `mod_desc`, `mod_spec`, `mod_id`, `MODEL` +- functions: `mod_params`, `mod_sim`, `mod_fit` -Generated model classes should include a shared utility module `pyem/models/{model_class}_utils.py` and one or more model files `pyem/models/{model_name}.py` that import shared helpers: +## Offline resources -```python -from .{model_class}_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params -``` +This skill is self-contained and does not require repository model files: -Use `references/modelclass-utils-template.py` and `references/model-file-template.py` as starting points. +- `references/modelclass-utils-template.py` +- `references/model-file-template.py` +- `references/example-notebook-template.json` +- `references/parameter-recovery-notebook.md` +- `references/pyem-runtime-contract.md` diff --git a/skills/pyem-model-generator/SKILL.md b/skills/pyem-model-generator/SKILL.md index c8c1c67..d8f8199 100644 --- a/skills/pyem-model-generator/SKILL.md +++ b/skills/pyem-model-generator/SKILL.md @@ -1,199 +1,108 @@ --- name: pyem-model-generator -description: Generate new computational cognitive model modules for pyEM and matching example notebooks. Use when asked to add a model not included in base pyEM, scaffold `pyem.models.{modelclass}.py` with `{modelname}_sim(params, nblocks, ntrials, **kwargs)` and `{modelname}_fit(params, *, prior=None, output="npl")`, and produce `examples/{modelclass}.ipynb`. Trigger this skill when the user wants pyEM-style imports, parameter transformations (e.g., `norm2alpha`, `norm2beta`), output dictionaries, model variants, or RL/Bayes/GLM-aligned structure. +description: Generate new computational cognitive model modules and example notebooks for pyEM-style workflows, including from free-text task/model descriptions. Use this skill to scaffold model-class shared utilities (`modclass_utils.py`), per-model files with `mod_desc/mod_spec/mod_id/MODEL`, and parameter-recovery notebooks when adding models not present in base packages. --- # pyem-model-generator -Generate pyEM-compatible model code using patterns in: +Generate standalone model code and notebooks from bundled references only. -- `pyem/models/rl.py` -- `pyem/models/bayes.py` -- `pyem/models/glm.py` +## Required resources (always local to this skill) -## Offline/resource mode +- `references/template.json` +- `references/rl.json` +- `references/bayes.json` +- `references/glm.json` +- `references/modelclass-utils-template.py` +- `references/model-file-template.py` +- `references/example-notebook-template.json` +- `references/parameter-recovery-notebook.md` +- `references/pyem-runtime-contract.md` -When full `pyem` package files are unavailable, load: +Do not assume repository model files or installed pyem package are available. -- `references/pyem-runtime-contract.md` for utility and fit contracts. -- `references/parameter-recovery-notebook.md` for notebook structure and plotting requirements. -- `references/example-notebook-template.json` for a ready-to-fill notebook cell template. +## Clarification behavior -In offline mode, follow these references instead of guessing utility behavior. +If required information is missing, ask concise follow-up questions before generation. +Required confirmations: -## Mandatory clarification behavior +1. `model_class`, `model_name`, and output paths. +2. Task structure (`nsubjects`, `nblocks`, `ntrials`, choice count). +3. Parameter names, transforms, bounds, and priors. +4. Update equations and choice rule. +5. Variant definitions and naming. +6. Notebook requirements (recovery metrics and plots). -If any required information is missing or ambiguous, ask the user concise follow-up questions before generating code. +## Free-text parsing workflow -Required items to confirm: +When the user gives prose/equations instead of structured JSON: -1. `model_class`, `model_name`, and target module path. -2. Simulation task inputs (at minimum `nblocks` and `ntrials`; plus task-specific arrays). -3. Parameter list with transform/bounds and semantic role. -4. Sim output keys and fit output modes. -5. Variant definitions (if requested). - - -## Converting free-text model descriptions into code - -When the user provides prose/equations instead of a filled template: - -1. Copy the text into `template.description_input.raw_text`. -2. Extract structured fields into `template.description_input.extracted_spec`: - - task flow (stimulus, choice set, outcomes, feedback), - - tensor shapes (subject/block/trial/option), - - latent state names (`Q_self`, `Q_other`, etc.), +1. Place original text in `description_input.raw_text`. +2. Parse into `description_input.extracted_spec`: + - task flow and outcomes, + - tensor shapes, - update equations, - - choice policy equation(s), - - variant catalog and parameter toggles. -3. Normalize equation variables into valid Python names and map them to parameter definitions. -4. If any mapping is ambiguous (for example sign conventions or variant naming), ask targeted follow-up questions before code generation. -5. Generate sim/fit using the extracted spec and preserve the user's intended equations exactly. + - choice rule(s), + - variant rules. +3. Normalize symbols into valid Python names. +4. Ask targeted questions for ambiguities (sign conventions, variant toggles, data keys). +5. Preserve equation intent when generating `mod_sim` and `mod_fit`. -### Example: social signals description +## Model-class utility heuristic (required) -For text like a “social signals task” with three options (A/B/C), dual value tracks (`Q_self`, `Q_other`), and policy variants, produce: +Generate shared utility module first, then model files: -- arrays shaped `(nsubjects, nblocks, ntrials, 3)` for option-level values/probabilities, -- update rules for `Q_self` and `Q_other` with separate learning-rate/valence parameters where requested, -- base policy `softmax(beta * (w_self * Q_self + w_other * Q_other))`, -- arbitration variants `p = (1-omega) * softmax(beta * Q_self) + omega * softmax(beta * Q_other)`, -- variant names tracked in template `variants.variant_names` and parsed parameter switches in `description_input.extracted_spec.variant_rules`. +- Shared module: `pyem/models/{model_class}_utils.py` +- Model module(s): `pyem/models/{model_name}.py` -## pyEM import and function format (pseudo code) +Each model file must import shared helpers using this contract: ```python -import numpy as np -from ..utils.math import softmax, norm2alpha, norm2beta, calc_fval - - -def {model_name}_sim( - params: np.ndarray, - nblocks: int = 4, - ntrials: int = 12, - **kwargs, -) -> dict: - """Simulate behavior for one model family.""" - n_subjects = params.shape[0] - rng = np.random.default_rng(kwargs.get("seed", None)) - - beta = params[:, 0] # natural-space for simulation - alpha = params[:, 1] # natural-space for simulation - - choices = np.empty((n_subjects, nblocks, ntrials), dtype=object) - rewards = np.zeros((n_subjects, nblocks, ntrials), dtype=float) - ev = np.zeros((n_subjects, nblocks, ntrials + 1, 2), dtype=float) - pe = np.zeros((n_subjects, nblocks, ntrials), dtype=float) - nll = np.zeros((n_subjects, nblocks, ntrials), dtype=float) - - for s in range(n_subjects): - for b in range(nblocks): - ev[s, b, 0, :] = 0.5 - for t in range(ntrials): - p = softmax(ev[s, b, t, :], beta[s]) - c = rng.choice([0, 1], p=p) - r = 0.0 # replace with task-specific outcome logic - pe[s, b, t] = r - ev[s, b, t, c] - ev[s, b, t + 1, :] = ev[s, b, t, :] - ev[s, b, t + 1, c] = ev[s, b, t, c] + alpha[s] * pe[s, b, t] - nll[s, b, t] = -np.log(p[c] + 1e-12) - - return { - "params": params, - "choices": choices, - "rewards": rewards, - "EV": ev, - "PE": pe, - "nll": nll, - } - - -def {model_name}_fit( - params, - *, - prior=None, - output: str = "npl", - **kwargs, -): - """Compute fit objective compatible with pyEM.""" - beta = float(norm2beta(params[0])) - alpha = float(norm2alpha(params[1])) - - if not (1e-5 <= beta <= 20.0): - return 1e7 - if not (0.0 <= alpha <= 1.0): - return 1e7 - - choices = kwargs["choices"] - rewards = kwargs["rewards"] - nblocks, ntrials = rewards.shape - - ev = np.zeros((nblocks, ntrials + 1, 2), dtype=float) - pe = np.zeros((nblocks, ntrials), dtype=float) - nll = 0.0 - - for b in range(nblocks): - ev[b, 0, :] = 0.5 - for t in range(ntrials): - c = 0 if choices[b, t] == "A" else 1 - p = softmax(ev[b, t, :], beta) - r = rewards[b, t] - pe[b, t] = r - ev[b, t, c] - ev[b, t + 1, :] = ev[b, t, :] - ev[b, t + 1, c] = ev[b, t, c] + alpha * pe[b, t] - nll += -np.log(p[c] + 1e-12) - - if output == "all": - return {"params": np.array([beta, alpha]), "EV": ev, "PE": pe, "nll": nll} - - return calc_fval(nll, params, prior=prior, output=output) +from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params ``` +Shared helper expectations: -## Model-class utility heuristic +- `_alloc_sim` / `_alloc_fit`: tensor allocation. +- `ModelSpec`: model metadata registration. +- `spec_to_id`: deterministic model ID from `mod_spec`. +- `build_params`: parameter initialization and transforms. -Prefer a per-class shared utility file so models in the same class reuse scaffolding: +## Per-model file contract -- Generate `pyem/models/{model_class}_utils.py` using `references/modelclass-utils-template.py`. -- Generate each model variant file as `pyem/models/{model_name}.py` using `references/model-file-template.py`. -- In each model file, import shared helpers with: +Each generated `{model_name}.py` should include: -```python -from .{model_class}_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params -``` +- attributes: `mod_desc`, `mod_spec`, `mod_id`, `MODEL` +- functions: `mod_params`, `mod_sim`, `mod_fit` -Expected sharing points across model files: +Use `references/model-file-template.py` as the base pattern. -- `_alloc_sim` / `_alloc_fit` for tensor allocation. -- `ModelSpec` for registry metadata. -- `spec_to_id` for deterministic model IDs. -- `build_params` and parameter registry for consistent initialization/transforms. +## Notebook generation contract -When users provide custom helper code, preserve the same API names so all models in a class remain interoperable. +Generate `examples/{model_class}.ipynb` from `references/example-notebook-template.json`. -## Generation workflow +Required notebook flow: -1. Load `template.json` and, if package context is missing, load relevant files under `references/`. -2. If the user supplied prose/equations, parse them into `description_input.extracted_spec`; if fields remain missing, ask follow-up questions and wait for answers. -3. Generate `pyem/models/{model_class}_utils.py` first, then generate one or more `pyem/models/{model_name}.py` files that import shared helpers from that utils module. -4. Generate `examples/{model_class}.ipynb` from `references/example-notebook-template.json` and align section order to `examples/rl.ipynb`, `examples/bayes.ipynb`, and `examples/glm.ipynb` conventions: - - model/task description, - - simulation demo, - - fit simulated behavior via `EMModel.recover`, - - parameter recovery plot with identity line and correlation per parameter. -5. Run smoke checks: import module, run sim, run fit with `output="npl"`. +1. model/task overview markdown +2. parameter setup +3. simulation run +4. fit/recovery run +5. parameter recovery plots (identity line + correlation) -## Optional reference alignment +See `references/parameter-recovery-notebook.md` for section and plotting details. -If needed, consult `references/rl.json`, `references/bayes.json`, and `references/glm.json` to mirror existing style and output contracts. +## Generation steps -## Notebook generation without repository access +1. Load `references/template.json`. +2. Merge user inputs (or parsed free-text spec) into template fields. +3. Generate `modclass_utils.py` from `references/modelclass-utils-template.py`. +4. Generate `{model_name}.py` from `references/model-file-template.py` with required attributes/functions. +5. Generate notebook from `references/example-notebook-template.json`. +6. Run smoke checks on generated code/notebook cells when execution is requested. -When no local `examples/` notebooks are accessible: +## Smoke checks -1. Load `references/parameter-recovery-notebook.md`. -2. Load `references/example-notebook-template.json`. -3. Fill placeholders and write a valid `.ipynb` (nbformat 4). -4. Ensure imports include `EMModel` and the generated sim/fit functions. -5. Ensure final cells run simulation, fitting, and recovery plotting end-to-end. +- Import generated utils and model modules. +- Run `mod_params`, `mod_sim`, and `mod_fit(output="npl")` on minimal synthetic data. +- Verify notebook cells execute through recovery plotting. diff --git a/skills/pyem-model-generator/references/bayes.json b/skills/pyem-model-generator/references/bayes.json index 10c7593..acd32f9 100644 --- a/skills/pyem-model-generator/references/bayes.json +++ b/skills/pyem-model-generator/references/bayes.json @@ -1,35 +1,30 @@ { - "source": "pyem/models/bayes.py", - "module": "pyem.models.bayes", - "imports": [ - "import numpy as np", - "from ..utils.math import norm2alpha, calc_fval" - ], - "helpers": [ + "model_class": "bayes", + "models": [ { - "name": "_generate_fishp", - "signature": "_generate_fishp(lambda1: float, n_fish: int) -> np.ndarray" + "model_name": "bayes_fish", + "model_file": "pyem/models/bayes_fish.py", + "model_file_contract": { + "attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], + "functions": ["mod_params", "mod_sim", "mod_fit"], + "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params" + }, + "mod_spec": { + "rl": { + "softmax": ["beta"] + }, + "link": { + "bayes": ["lambda1"] + } + }, + "parameters": ["beta", "lambda1"], + "sim_outputs": ["params", "choices", "observations", "posterior", "nll"], + "fit_outputs": { + "scalar": ["npl", "nll"], + "all": ["params", "posterior", "nll"] + } } ], - "functions": [ - { - "name": "bayes_sim", - "kind": "sim", - "signature": "bayes_sim(params: np.ndarray, nblocks: int = 10, ntrials: int = 15, n_fish: int = 3) -> dict", - "param_space": "natural", - "state_keys": ["choices", "observations", "probabilities", "ponds"], - "output_keys": ["params", "choices", "observations", "probabilities", "ponds"] - }, - { - "name": "bayes_fit", - "kind": "fit", - "signature": "bayes_fit(params, choices, observations, prior=None, output: str = 'npl')", - "transform_map": [ - {"index": 0, "name": "lambda1", "transform": "norm2alpha", "bounds": [0.0, 1.0]} - ], - "output_modes": ["npl", "nll", "all"], - "all_output_keys": ["params", "nll"], - "objective_call": "calc_fval(nll, params, prior=prior, output=output)" - } - ] + "utils_file": "pyem/models/bayes_utils.py", + "utils_required_symbols": ["_alloc_sim", "_alloc_fit", "ModelSpec", "spec_to_id", "build_params"] } diff --git a/skills/pyem-model-generator/references/glm.json b/skills/pyem-model-generator/references/glm.json index 8e4113e..5154a56 100644 --- a/skills/pyem-model-generator/references/glm.json +++ b/skills/pyem-model-generator/references/glm.json @@ -1,40 +1,27 @@ { - "source": "pyem/models/glm.py", - "module": "pyem.models.glm", - "imports": [ - "import numpy as np", - "from scipy.stats import norm", - "from scipy.special import expit", - "from ..utils.math import norm2alpha, calc_fval" - ], - "functions": [ - { - "name": "glm_sim", - "kind": "sim", - "signature": "glm_sim(params: np.ndarray, ntrials: int = 100)", - "param_space": "natural", - "output_shape": "tuple", - "output_keys": ["X", "Y"] - }, + "model_class": "glm", + "models": [ { - "name": "glm_fit", - "kind": "fit", - "signature": "glm_fit(params, X, Y, prior=None, output: str = 'npl')", - "transform_map": [], - "output_modes": ["npl", "nll", "all"], - "all_output_keys": ["params", "predicted_y", "negll", "BIC"], - "objective_call": "calc_fval(negll, params, prior=prior, output=output)" - }, - { - "name": "glm_decay_fit", - "kind": "fit", - "signature": "glm_decay_fit(params, X, Y, prior=None, output: str = 'npl', decay: str = 'twostep')", - "transform_map": [ - {"index": -1, "name": "gamma", "transform": "norm2alpha", "bounds": [0.0, 1.0]} - ], - "output_modes": ["npl", "nll", "all"], - "all_output_keys": ["params", "predicted_y", "nll", "BIC"], - "objective_call": "calc_fval(negll, params, prior=prior, output=output)" + "model_name": "glm_linear", + "model_file": "pyem/models/glm_linear.py", + "model_file_contract": { + "attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], + "functions": ["mod_params", "mod_sim", "mod_fit"], + "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params" + }, + "mod_spec": { + "cr": { + "linear": ["w0", "w1", "sigma"] + } + }, + "parameters": ["w0", "w1", "sigma"], + "sim_outputs": ["params", "X", "y", "pred", "nll"], + "fit_outputs": { + "scalar": ["npl", "nll"], + "all": ["params", "pred", "nll", "bic"] + } } - ] + ], + "utils_file": "pyem/models/glm_utils.py", + "utils_required_symbols": ["_alloc_sim", "_alloc_fit", "ModelSpec", "spec_to_id", "build_params"] } diff --git a/skills/pyem-model-generator/references/model-file-template.py b/skills/pyem-model-generator/references/model-file-template.py index 92cec37..324a7d1 100644 --- a/skills/pyem-model-generator/references/model-file-template.py +++ b/skills/pyem-model-generator/references/model-file-template.py @@ -4,12 +4,15 @@ import numpy as np -from pyem.utils.math import calc_fval, norm2alpha, norm2beta, softmax -from .model_class_utils import ( +from .modclass_utils import ( ModelSpec, _alloc_fit, _alloc_sim, build_params, + calc_fval, + norm2alpha, + norm2beta, + softmax, spec_to_id, ) @@ -78,7 +81,7 @@ def mod_fit(params, choices, rewards, prior=None, output="npl"): if output == "all": return {"params": [beta, alpha], **dat} - return calc_fval(dat["nll"], params, prior=prior, output=output) + return calc_fval(dat["nll"], np.asarray(params), prior=prior, output=output) MODEL = ModelSpec( diff --git a/skills/pyem-model-generator/references/modelclass-utils-template.py b/skills/pyem-model-generator/references/modelclass-utils-template.py index a5a1b00..f005f4a 100644 --- a/skills/pyem-model-generator/references/modelclass-utils-template.py +++ b/skills/pyem-model-generator/references/modelclass-utils-template.py @@ -1,6 +1,7 @@ -"""Shared utilities for models in one model class. +"""Shared utilities for a model class. -Copy this template to `pyem/models/{model_class}_utils.py` and customize. +Copy this template to `pyem/models/{modclass}_utils.py` (or equivalent output +path) and customize parameter registries/allocation fields as needed. """ from __future__ import annotations @@ -9,12 +10,11 @@ from typing import Callable, Dict, Sequence import numpy as np -from pyem.utils.math import norm2alpha, norm2beta @dataclass(frozen=True) class ModelSpec: - """Container for a generated model implementation.""" + """Container for one generated model variant.""" id: str spec: dict @@ -24,6 +24,45 @@ class ModelSpec: fit: Callable +@dataclass(frozen=True) +class ParamDef: + """Definition for one parameter in the registry.""" + + name: str + xform: Callable + init_fn: Callable + + +def norm2alpha(x: float | np.ndarray) -> float | np.ndarray: + """Map unconstrained real values to (0, 1).""" + return 1.0 / (1.0 + np.exp(-x)) + + +def norm2beta(x: float | np.ndarray) -> float | np.ndarray: + """Map unconstrained real values to (1e-5, 20].""" + return 1e-5 + (20.0 - 1e-5) / (1.0 + np.exp(-x)) + + +def softmax(values: np.ndarray, beta: float) -> np.ndarray: + """Compute numerically stable softmax(beta * values).""" + z = beta * (values - np.max(values)) + exp_z = np.exp(z) + return exp_z / np.sum(exp_z) + + +def calc_fval(nll: float, params: np.ndarray, prior=None, output: str = "npl") -> float: + """Return objective value expected by generated fit functions.""" + if output == "nll" or prior is None: + return float(nll) + if output == "npl": + # lightweight Gaussian prior support + mu = np.asarray(prior.get("mu", np.zeros_like(params)), dtype=float) + sigma = np.asarray(prior.get("sigma", np.ones_like(params)), dtype=float) + log_prior = -0.5 * np.sum(((params - mu) / sigma) ** 2) + return float(nll - log_prior) + raise ValueError("output must be 'npl' or 'nll'") + + def spec_to_id(spec: dict) -> str: """Convert a spec dictionary into a deterministic model ID string.""" block_order = ["rl", "cr", "link"] @@ -77,15 +116,6 @@ def _alloc_fit(nblocks: int, ntrials: int, nchoices: int = 2) -> Dict[str, np.nd } -@dataclass(frozen=True) -class ParamDef: - """Definition for one model parameter in the registry.""" - - name: str - xform: Callable - init_fn: Callable - - PARAM_REGISTRY = { "beta": ParamDef("beta", norm2beta, lambda rng, n: rng.uniform(0.5, 8.0, size=n)), "alpha": ParamDef("alpha", norm2alpha, lambda rng, n: rng.uniform(0.1, 0.9, size=n)), diff --git a/skills/pyem-model-generator/references/parameter-recovery-notebook.md b/skills/pyem-model-generator/references/parameter-recovery-notebook.md index 753b77a..c43e3cc 100644 --- a/skills/pyem-model-generator/references/parameter-recovery-notebook.md +++ b/skills/pyem-model-generator/references/parameter-recovery-notebook.md @@ -2,7 +2,7 @@ Use this reference to implement `examples/{model_class}.ipynb` even when base example notebooks are unavailable. -This pattern is distilled from the repository notebooks under `examples/` (`rl.ipynb`, `bayes.ipynb`, `glm.ipynb`): +This pattern follows the bundled offline templates and anchor specs (`references/rl.json`, `references/bayes.json`, `references/glm.json`) so notebook generation does not require repository access: - Intro markdown title + task subtitle. - Import block (`numpy`, plotting, model sim/fit, `EMModel`). diff --git a/skills/pyem-model-generator/references/rl.json b/skills/pyem-model-generator/references/rl.json index 94decd5..95e7b49 100644 --- a/skills/pyem-model-generator/references/rl.json +++ b/skills/pyem-model-generator/references/rl.json @@ -1,30 +1,28 @@ { - "source": "pyem/models/rl.py", - "module": "pyem.models.rl", - "imports": [ - "import numpy as np", - "from ..utils.math import softmax, norm2alpha, norm2beta, calc_fval" - ], - "functions": [ - { - "name": "rw1a1b_sim", - "kind": "sim", - "signature": "rw1a1b_sim(params: np.ndarray, nblocks: int = 3, ntrials: int = 24, outcomes: np.ndarray | None = None)", - "param_space": "natural", - "state_keys": ["choices", "rewards", "EV", "ch_prob", "choices_A", "PE", "nll"], - "output_keys": ["params", "choices", "rewards", "EV", "ch_prob", "choices_A", "PE", "nll"] - }, + "model_class": "rl", + "models": [ { - "name": "rw1a1b_fit", - "kind": "fit", - "signature": "rw1a1b_fit(params, choices, rewards, prior=None, output=\"npl\")", - "transform_map": [ - {"index": 0, "name": "beta", "transform": "norm2beta", "bounds": [1e-05, 20.0]}, - {"index": 1, "name": "alpha", "transform": "norm2alpha", "bounds": [0.0, 1.0]} - ], - "output_modes": ["npl", "nll", "all"], - "all_output_keys": ["params", "choices", "choices_A", "rewards", "EV", "PE", "nll"], - "objective_call": "calc_fval(nll, params, prior=prior, output=output)" + "model_name": "rw1a1b", + "model_file": "pyem/models/rw1a1b.py", + "model_file_contract": { + "attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], + "functions": ["mod_params", "mod_sim", "mod_fit"], + "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params" + }, + "mod_spec": { + "rl": { + "softmax": ["beta"], + "rw": ["alpha"] + } + }, + "parameters": ["beta", "alpha"], + "sim_outputs": ["params", "choices", "ev", "ch_prob", "pe", "nll"], + "fit_outputs": { + "scalar": ["npl", "nll"], + "all": ["params", "ev", "ch_prob", "pe", "nll"] + } } - ] + ], + "utils_file": "pyem/models/rl_utils.py", + "utils_required_symbols": ["_alloc_sim", "_alloc_fit", "ModelSpec", "spec_to_id", "build_params"] } diff --git a/skills/pyem-model-generator/references/template.json b/skills/pyem-model-generator/references/template.json new file mode 100644 index 0000000..7a3110a --- /dev/null +++ b/skills/pyem-model-generator/references/template.json @@ -0,0 +1,67 @@ +{ + "version": "2.0", + "model_class": "", + "model_name": "", + "paths": { + "utils_file": "pyem/models/{model_class}_utils.py", + "model_file": "pyem/models/{model_name}.py", + "notebook_file": "examples/{model_class}.ipynb" + }, + "model_file_contract": { + "required_attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], + "required_functions": ["mod_params", "mod_sim", "mod_fit"], + "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params" + }, + "task": { + "nsubjects": 100, + "nblocks": 4, + "ntrials": 12, + "nchoices": 2, + "choice_labels": ["A", "B"], + "outcome_keys": ["rewards"] + }, + "parameters": [ + { + "name": "beta", + "index": 0, + "transform": "norm2beta", + "bounds": [1e-05, 20.0] + }, + { + "name": "alpha", + "index": 1, + "transform": "norm2alpha", + "bounds": [0.0, 1.0] + } + ], + "equations": { + "state_updates": [], + "choice_rule": "softmax(beta * values)" + }, + "variants": { + "requested": false, + "variant_names": [], + "variant_rules": [] + }, + "description_input": { + "raw_text": "", + "extracted_spec": { + "task_flow": [], + "state_tensors": [], + "update_equations": [], + "choice_rule": "", + "variant_rules": [] + } + }, + "notebook": { + "template_file": "references/example-notebook-template.json", + "include_parameter_recovery_plot": true, + "nsubjects": 50, + "random_seed": 123 + }, + "reference_specs": [ + "references/rl.json", + "references/bayes.json", + "references/glm.json" + ] +} diff --git a/skills/pyem-model-generator/template.json b/skills/pyem-model-generator/template.json index 3b60c7a..7a3110a 100644 --- a/skills/pyem-model-generator/template.json +++ b/skills/pyem-model-generator/template.json @@ -1,196 +1,63 @@ { + "version": "2.0", "model_class": "", "model_name": "", - "module_path": "pyem/models/{model_class}.py", - "imports": [ - "import numpy as np", - "from ..utils.math import softmax, norm2alpha, norm2beta, calc_fval" - ], + "paths": { + "utils_file": "pyem/models/{model_class}_utils.py", + "model_file": "pyem/models/{model_name}.py", + "notebook_file": "examples/{model_class}.ipynb" + }, + "model_file_contract": { + "required_attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], + "required_functions": ["mod_params", "mod_sim", "mod_fit"], + "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params" + }, + "task": { + "nsubjects": 100, + "nblocks": 4, + "ntrials": 12, + "nchoices": 2, + "choice_labels": ["A", "B"], + "outcome_keys": ["rewards"] + }, "parameters": [ { "name": "beta", "index": 0, - "description": "Inverse temperature", - "sim_space": "natural", - "fit_space": "normalized", "transform": "norm2beta", - "bounds": [ - 1e-05, - 20.0 - ] + "bounds": [1e-05, 20.0] }, { "name": "alpha", "index": 1, - "description": "Learning rate", - "sim_space": "natural", - "fit_space": "normalized", "transform": "norm2alpha", - "bounds": [ - 0.0, - 1.0 - ] + "bounds": [0.0, 1.0] } ], - "sim": { - "signature": "{model_name}_sim(params, nblocks, ntrials, **kwargs)", - "inputs": { - "nblocks": 4, - "ntrials": 12, - "custom": [ - { - "name": "seed", - "type": "int | None", - "required": false, - "description": "Seed for np.random.default_rng." - }, - { - "name": "outcomes", - "type": "np.ndarray | None", - "required": false, - "description": "Optional externally supplied outcomes." - } - ] - }, - "state_arrays": [ - "choices", - "rewards", - "EV", - "PE", - "nll" - ], - "output_keys": [ - "params", - "choices", - "rewards", - "EV", - "PE", - "nll" - ] - }, - "fit": { - "signature": "{model_name}_fit(params, *, prior=None, output=\"npl\", **kwargs)", - "required_inputs": [ - "choices", - "rewards" - ], - "output_modes": [ - "npl", - "nll", - "all" - ], - "all_output_keys": [ - "params", - "nll" - ], - "failure_return": 10000000.0, - "objective": "calc_fval(nll, params, prior=prior, output=output)" + "equations": { + "state_updates": [], + "choice_rule": "softmax(beta * values)" }, "variants": { "requested": false, "variant_names": [], - "differences": "" - }, - "notebook": { - "path": "examples/{model_class}.ipynb", - "include_parameter_recovery_plot": true, - "sections": [ - "Model and task overview", - "Parameter specification", - "Simulation example", - "Fit simulated behavior", - "Parameter recovery plot", - "Result summary" - ], - "random_seed": 123, - "template_file": "references/example-notebook-template.json", - "pattern_source": "examples/rl.ipynb, examples/bayes.ipynb, examples/glm.ipynb", - "nsubjects": 50, - "param_recovery": { - "use_emmodel_recover": true, - "plot_identity_line": true, - "plot_corr_in_title": true - } + "variant_rules": [] }, "description_input": { "raw_text": "", "extracted_spec": { - "task_name": "", "task_flow": [], - "choice_set": [ - "A", - "B" - ], - "nchoices": 2, - "state_tensors": [ - { - "name": "Q_self", - "shape": "(nsubjects, nblocks, ntrials+1, nchoices)" - }, - { - "name": "Q_other", - "shape": "(nsubjects, nblocks, ntrials+1, nchoices)" - } - ], + "state_tensors": [], "update_equations": [], "choice_rule": "", "variant_rules": [] } }, - "description_examples": { - "social_signals": { - "task_name": "social signals task", - "nsubjects": 100, - "sim_inputs": { - "nblocks": 4, - "ntrials": 12, - "nchoices": 3 - }, - "task_flow": [ - "show three options A/B/C", - "store choice in (subject, block, trial) array", - "observe clear signal outcome_self in {0,1}", - "observe partner social feedback mapped to outcome_other" - ], - "state_tensors": [ - { - "name": "Q_self", - "shape": "(nsubjects, nblocks, ntrials+1, 3)" - }, - { - "name": "Q_other", - "shape": "(nsubjects, nblocks, ntrials+1, 3)" - } - ], - "update_equations": [ - "Q_self[s,b,t+1,c] = Q_self[s,b,t,c] + alpha_self * (outcome_self[s,b,t] - Q_self[s,b,t,c])", - "Q_other[s,b,t+1,c] = Q_other[s,b,t,c] + alpha_other * (outcome_other[s,b,t] - Q_other[s,b,t,c])" - ], - "choice_rule": "p(choice) = softmax(beta * (w_self*Q_self[s,b,t,c] + w_other*Q_other[s,b,t,c]))", - "outcome_other_definition": "social_sensitivity_pos*1 if positive else social_sensitivity_neg*-1", - "variant_rules": [ - "1b2w2a: alpha_self, alpha_other with valence-sensitive theta terms", - "1b2w2a2t: alpha_self_pos, alpha_self_neg, alpha_other_pos, alpha_other_neg", - "1b2w2a4t: separate self and other sensitivity for pos/neg outcomes", - "1b1o2a4t/1b1o1a4t/1b1o2a2t/1b1o2a: arbitration p=(1-omega)*softmax(beta*Q_self)+omega*softmax(beta*Q_other)" - ] - } - }, - "scaffold_layout": { - "model_utils_path": "pyem/models/{model_class}_utils.py", - "model_files": [ - "pyem/models/{model_name}.py" - ], - "shared_import": "from .{model_class}_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params", - "required_shared_symbols": [ - "_alloc_sim", - "_alloc_fit", - "ModelSpec", - "spec_to_id", - "build_params" - ], - "utils_template": "references/modelclass-utils-template.py", - "model_template": "references/model-file-template.py" + "notebook": { + "template_file": "references/example-notebook-template.json", + "include_parameter_recovery_plot": true, + "nsubjects": 50, + "random_seed": 123 }, "reference_specs": [ "references/rl.json", From c833faa038bf229801afe255e8deaef2edb349f2 Mon Sep 17 00:00:00 2001 From: Shawn Rhoads <24925845+shawnrhoads@users.noreply.github.com> Date: Sun, 26 Apr 2026 19:08:46 -0400 Subject: [PATCH 03/14] Add skill review summary and example generation prompts --- skills/pyem-model-generator/README.md | 66 +++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/skills/pyem-model-generator/README.md b/skills/pyem-model-generator/README.md index 83c12b8..e24f231 100644 --- a/skills/pyem-model-generator/README.md +++ b/skills/pyem-model-generator/README.md @@ -2,6 +2,16 @@ Use this skill to scaffold new computational cognitive model modules and example notebooks from either structured JSON or free-text equations. +## Review summary + +This skill is currently in good shape for offline generation because it has: + +- A canonical, local template (`references/template.json`) and a convenience copy (`template.json`). +- Local reference anchors for RL/Bayes/GLM (`references/rl.json`, `references/bayes.json`, `references/glm.json`). +- Self-contained model scaffolding templates (`references/modelclass-utils-template.py`, `references/model-file-template.py`) with shared helper contracts. +- Notebook generation templates and parameter-recovery guidance (`references/example-notebook-template.json`, `references/parameter-recovery-notebook.md`). +- Runtime math/objective contracts in `references/pyem-runtime-contract.md`. + ## Primary template - `references/template.json` (canonical template) @@ -40,3 +50,59 @@ This skill is self-contained and does not require repository model files: - `references/example-notebook-template.json` - `references/parameter-recovery-notebook.md` - `references/pyem-runtime-contract.md` + +## Example prompts + +Use prompts like the following with this skill. + +### 1) Reversal learning (Kalman filter RL) + +```text +Use pyem-model-generator. +Task: Reversal learning RL with two options A (80% reward) and B (20% reward), 2 blocks, 40 trials per block, reversals every 10 trials. +Per trial: choose A or B, then observe reward (+1) or no reward (0). +Generate a Kalman filter model to simulate behavior and fit the same model to simulated behavior. +Please output: +1) pyem/models/{model_class}_utils.py +2) pyem/models/{model_name}.py with mod_desc/mod_spec/mod_id/MODEL and mod_params/mod_sim/mod_fit +3) examples/{model_class}.ipynb with parameter recovery plots. +If anything is ambiguous, ask follow-up questions first. +``` + +### 2) Two-step task (model-free, model-based, hybrid) + +```text +Use pyem-model-generator. +Task: Two-step task with two stages per trial. +Stage 1: choose between two first-stage options; common transition p=0.70 and rare transition p=0.30 to one of two second-stage states (fixed mapping). +Stage 2: choose between two options in reached state; reward/no reward outcome. +Second-stage reward probabilities (4 arms) drift by independent bounded Gaussian random walks in [0.25, 0.75]. +Generate three variants: +- Model-free SARSA(lambda) with parameters alpha_1, alpha_2, lambda, beta_1, beta_2, p +- Model-based with Bellman-style prospective first-stage valuation +- Hybrid with Q_net = w*Q_MB + (1-w)*Q_TD and parameters beta_1, beta_2, alpha_1, alpha_2, lambda, p, w +Please generate files using the model-class utility layout and include a recovery notebook that compares recovered parameters for all variants. +Ask clarifying questions if needed. +``` + +### 3) Social signals task with variants + +```text +Use pyem-model-generator. +The task is called the social signals task. On each trial, participants see three options (A,B,C) and choose one option (stored in subject, block, trial arrays), then observe clear signal (+1) or not (0), then receive social feedback (thumbs up/down). +Use 100 agents, 4 blocks, 12 trials per block. +Model equations: +Q_self[s,b,t+1,c] = Q_self[s,b,t,c] + alpha_self * (outcome_self[s,b,t] - Q_self[s,b,t,c]) +Q_other[s,b,t+1,c] = Q_other[s,b,t,c] + alpha_other * (outcome_other[s,b,t] - Q_other[s,b,t,c]) +outcome_self in {0,1} +outcome_other = social_sensitivity_pos*1 (if positive) or social_sensitivity_neg*-1 (if negative), label this theta +p(choice) = softmax(beta * (w_self*Q_self[s,b,t,c] + w_other*Q_other[s,b,t,c])) +Variants include: +- 1b2w2a +- 1b2w2a2t +- 1b2w2a4t +- arbitration variants: 1b1o2a4t, 1b1o1a4t, 1b1o2a2t, 1b1o2a using + p(choice) = (1-omega)*softmax(beta * Q_self[s,b,t,c]) + omega*softmax(beta * Q_other[s,b,t,c]) +Generate utility + model files + notebook with parameter recovery for each variant. +Ask follow-up questions for any ambiguous naming/sign conventions. +``` From 38229499610f15b586f782b2c15f3abbf37fd612 Mon Sep 17 00:00:00 2001 From: Shawn Rhoads <24925845+shawnrhoads@users.noreply.github.com> Date: Sun, 26 Apr 2026 19:09:00 -0400 Subject: [PATCH 04/14] Standardize README example prompts to a consistent pattern --- skills/pyem-model-generator/README.md | 82 ++++++++++++++++++--------- 1 file changed, 55 insertions(+), 27 deletions(-) diff --git a/skills/pyem-model-generator/README.md b/skills/pyem-model-generator/README.md index e24f231..194e5b4 100644 --- a/skills/pyem-model-generator/README.md +++ b/skills/pyem-model-generator/README.md @@ -53,56 +53,84 @@ This skill is self-contained and does not require repository model files: ## Example prompts -Use prompts like the following with this skill. +Use the same prompt structure for all tasks: + +1. **Context** (task design and data-generating process) +2. **Model requirements** (equations, variants, parameter set) +3. **Output contract** (utils file, model file(s), notebook) +4. **Clarification instruction** (ask questions before generating if ambiguous) ### 1) Reversal learning (Kalman filter RL) ```text Use pyem-model-generator. -Task: Reversal learning RL with two options A (80% reward) and B (20% reward), 2 blocks, 40 trials per block, reversals every 10 trials. -Per trial: choose A or B, then observe reward (+1) or no reward (0). -Generate a Kalman filter model to simulate behavior and fit the same model to simulated behavior. -Please output: -1) pyem/models/{model_class}_utils.py -2) pyem/models/{model_name}.py with mod_desc/mod_spec/mod_id/MODEL and mod_params/mod_sim/mod_fit -3) examples/{model_class}.ipynb with parameter recovery plots. -If anything is ambiguous, ask follow-up questions first. + +Context: +Reversal learning RL task with two options A (80% reward) and B (20% reward), 2 blocks, 40 trials per block, reversals every 10 trials. Per trial: choose A/B, then observe reward (+1) or no reward (0). + +Model requirements: +Implement a Kalman filter RL model and fit the same model to simulated behavior. + +Output contract: +- pyem/models/{model_class}_utils.py +- pyem/models/{model_name}.py with mod_desc, mod_spec, mod_id, MODEL and functions mod_params, mod_sim, mod_fit +- examples/{model_class}.ipynb with simulation, fitting, and parameter-recovery plots + +If any details are ambiguous, ask follow-up questions before generating files. ``` ### 2) Two-step task (model-free, model-based, hybrid) ```text Use pyem-model-generator. -Task: Two-step task with two stages per trial. -Stage 1: choose between two first-stage options; common transition p=0.70 and rare transition p=0.30 to one of two second-stage states (fixed mapping). -Stage 2: choose between two options in reached state; reward/no reward outcome. -Second-stage reward probabilities (4 arms) drift by independent bounded Gaussian random walks in [0.25, 0.75]. + +Context: +Two-step task with two stages per trial. +- Stage 1: choose between two first-stage options. +- Transition structure: common transition p=0.70, rare transition p=0.30, fixed mapping to two second-stage states. +- Stage 2: choose between two options in reached state and observe reward/no reward. +- Four second-stage reward probabilities drift independently by bounded Gaussian random walks in [0.25, 0.75]. + +Model requirements: Generate three variants: -- Model-free SARSA(lambda) with parameters alpha_1, alpha_2, lambda, beta_1, beta_2, p -- Model-based with Bellman-style prospective first-stage valuation -- Hybrid with Q_net = w*Q_MB + (1-w)*Q_TD and parameters beta_1, beta_2, alpha_1, alpha_2, lambda, p, w -Please generate files using the model-class utility layout and include a recovery notebook that compares recovered parameters for all variants. -Ask clarifying questions if needed. +1) Model-free SARSA(lambda) with parameters alpha_1, alpha_2, lambda, beta_1, beta_2, p. +2) Model-based learner using known transition structure and prospective first-stage valuation. +3) Hybrid learner with Q_net = w*Q_MB + (1-w)*Q_TD and parameters beta_1, beta_2, alpha_1, alpha_2, lambda, p, w. + +Output contract: +- pyem/models/{model_class}_utils.py +- pyem/models/{model_name}.py (or one file per variant) with mod_desc, mod_spec, mod_id, MODEL and functions mod_params, mod_sim, mod_fit +- examples/{model_class}.ipynb with simulation, fitting, and parameter-recovery comparison across variants + +If any details are ambiguous, ask follow-up questions before generating files. ``` ### 3) Social signals task with variants ```text Use pyem-model-generator. -The task is called the social signals task. On each trial, participants see three options (A,B,C) and choose one option (stored in subject, block, trial arrays), then observe clear signal (+1) or not (0), then receive social feedback (thumbs up/down). -Use 100 agents, 4 blocks, 12 trials per block. -Model equations: + +Context: +Social signals task. On each trial, participants see options A/B/C, choose one option (store in subject-block-trial arrays), observe clear signal (+1) or not (0), then receive social feedback (thumbs up/down). Use 100 agents, 4 blocks, 12 trials per block. + +Model requirements: +Use equations: Q_self[s,b,t+1,c] = Q_self[s,b,t,c] + alpha_self * (outcome_self[s,b,t] - Q_self[s,b,t,c]) Q_other[s,b,t+1,c] = Q_other[s,b,t,c] + alpha_other * (outcome_other[s,b,t] - Q_other[s,b,t,c]) outcome_self in {0,1} -outcome_other = social_sensitivity_pos*1 (if positive) or social_sensitivity_neg*-1 (if negative), label this theta +outcome_other = social_sensitivity_pos*1 (if positive) or social_sensitivity_neg*-1 (if negative), labeled theta p(choice) = softmax(beta * (w_self*Q_self[s,b,t,c] + w_other*Q_other[s,b,t,c])) -Variants include: +Variants: - 1b2w2a - 1b2w2a2t - 1b2w2a4t -- arbitration variants: 1b1o2a4t, 1b1o1a4t, 1b1o2a2t, 1b1o2a using - p(choice) = (1-omega)*softmax(beta * Q_self[s,b,t,c]) + omega*softmax(beta * Q_other[s,b,t,c]) -Generate utility + model files + notebook with parameter recovery for each variant. -Ask follow-up questions for any ambiguous naming/sign conventions. +- arbitration variants 1b1o2a4t, 1b1o1a4t, 1b1o2a2t, 1b1o2a with + p(choice) = (1-omega)*softmax(beta*Q_self[s,b,t,c]) + omega*softmax(beta*Q_other[s,b,t,c]) + +Output contract: +- pyem/models/{model_class}_utils.py +- pyem/models/{model_name}.py (or one file per variant) with mod_desc, mod_spec, mod_id, MODEL and functions mod_params, mod_sim, mod_fit +- examples/{model_class}.ipynb with simulation, fitting, and parameter-recovery plots for each variant + +If any details are ambiguous, ask follow-up questions before generating files. ``` From 5f59a0a293cd58d394a40b4403a49f68403eea2a Mon Sep 17 00:00:00 2001 From: Shawn Rhoads <24925845+shawnrhoads@users.noreply.github.com> Date: Sun, 26 Apr 2026 19:44:23 -0400 Subject: [PATCH 05/14] Simplify skill outputs and remove template duplication --- skills/pyem-model-generator/README.md | 134 +++--------------- skills/pyem-model-generator/SKILL.md | 108 +++++++------- .../references/bayes.json | 30 ++-- .../references/example-notebook-template.json | 8 +- .../pyem-model-generator/references/glm.json | 27 ++-- .../references/model-file-template.py | 7 +- .../references/modelclass-utils-template.py | 43 +----- .../references/parameter-recovery-notebook.md | 4 +- .../references/pyem-runtime-contract.md | 85 ++++------- .../pyem-model-generator/references/rl.json | 28 ++-- .../references/template.json | 67 --------- skills/pyem-model-generator/template.json | 67 --------- 12 files changed, 136 insertions(+), 472 deletions(-) delete mode 100644 skills/pyem-model-generator/references/template.json delete mode 100644 skills/pyem-model-generator/template.json diff --git a/skills/pyem-model-generator/README.md b/skills/pyem-model-generator/README.md index 194e5b4..1a1cc42 100644 --- a/skills/pyem-model-generator/README.md +++ b/skills/pyem-model-generator/README.md @@ -1,136 +1,42 @@ # pyem-model-generator skill -Use this skill to scaffold new computational cognitive model modules and example notebooks from either structured JSON or free-text equations. +Use this skill to scaffold standalone computational cognitive model files and recovery notebooks from reference specs or free-text equations. -## Review summary +## Plan to fix known flaws -This skill is currently in good shape for offline generation because it has: +1. Remove smoke-test requirements from the skill workflow. +2. Enforce notebook import of `EMModel` (`from pyem.api import EMModel`) and forbid `scipy.optimize.minimize` usage in notebook templates. +3. Remove duplicated template files (`template.json`, `references/template.json`). +4. Enforce flat output layout in one directory (no `pyem/models/...` or `examples/...` paths). +5. Restrict shared utils to exactly: `_alloc_sim`, `_alloc_fit`, `ModelSpec`, `ParamDef`, `spec_to_id`, `build_params`, `PARAM_REGISTRY`. +6. Keep math helper imports (`norm2alpha`, `norm2beta`, `softmax`, `calc_fval`) in model files and document them in `references/pyem-runtime-contract.md`. -- A canonical, local template (`references/template.json`) and a convenience copy (`template.json`). -- Local reference anchors for RL/Bayes/GLM (`references/rl.json`, `references/bayes.json`, `references/glm.json`). -- Self-contained model scaffolding templates (`references/modelclass-utils-template.py`, `references/model-file-template.py`) with shared helper contracts. -- Notebook generation templates and parameter-recovery guidance (`references/example-notebook-template.json`, `references/parameter-recovery-notebook.md`). -- Runtime math/objective contracts in `references/pyem-runtime-contract.md`. - -## Primary template - -- `references/template.json` (canonical template) -- `template.json` (copy of canonical template for convenience) - -## Reference anchors +## Current references - `references/rl.json` - `references/bayes.json` - `references/glm.json` - -## Model-class utility layout - -Generated model classes should include: - -- `pyem/models/{model_class}_utils.py` -- one or more `pyem/models/{model_name}.py` - -Each model file should follow this shared import contract: - -```python -from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params -``` - -Each generated `{model_name}.py` should define: - -- attributes: `mod_desc`, `mod_spec`, `mod_id`, `MODEL` -- functions: `mod_params`, `mod_sim`, `mod_fit` - -## Offline resources - -This skill is self-contained and does not require repository model files: - - `references/modelclass-utils-template.py` - `references/model-file-template.py` - `references/example-notebook-template.json` - `references/parameter-recovery-notebook.md` - `references/pyem-runtime-contract.md` -## Example prompts - -Use the same prompt structure for all tasks: +## Output contract -1. **Context** (task design and data-generating process) -2. **Model requirements** (equations, variants, parameter set) -3. **Output contract** (utils file, model file(s), notebook) -4. **Clarification instruction** (ask questions before generating if ambiguous) +Generate all files in the same directory: -### 1) Reversal learning (Kalman filter RL) +- `modclass_utils.py` +- `{model_name}.py` +- `{model_name}.ipynb` -```text -Use pyem-model-generator. +Each `{model_name}.py` must define: -Context: -Reversal learning RL task with two options A (80% reward) and B (20% reward), 2 blocks, 40 trials per block, reversals every 10 trials. Per trial: choose A/B, then observe reward (+1) or no reward (0). +- `mod_desc`, `mod_spec`, `mod_id`, `MODEL` +- `mod_params`, `mod_sim`, `mod_fit` -Model requirements: -Implement a Kalman filter RL model and fit the same model to simulated behavior. +Each notebook must use: -Output contract: -- pyem/models/{model_class}_utils.py -- pyem/models/{model_name}.py with mod_desc, mod_spec, mod_id, MODEL and functions mod_params, mod_sim, mod_fit -- examples/{model_class}.ipynb with simulation, fitting, and parameter-recovery plots - -If any details are ambiguous, ask follow-up questions before generating files. -``` - -### 2) Two-step task (model-free, model-based, hybrid) - -```text -Use pyem-model-generator. - -Context: -Two-step task with two stages per trial. -- Stage 1: choose between two first-stage options. -- Transition structure: common transition p=0.70, rare transition p=0.30, fixed mapping to two second-stage states. -- Stage 2: choose between two options in reached state and observe reward/no reward. -- Four second-stage reward probabilities drift independently by bounded Gaussian random walks in [0.25, 0.75]. - -Model requirements: -Generate three variants: -1) Model-free SARSA(lambda) with parameters alpha_1, alpha_2, lambda, beta_1, beta_2, p. -2) Model-based learner using known transition structure and prospective first-stage valuation. -3) Hybrid learner with Q_net = w*Q_MB + (1-w)*Q_TD and parameters beta_1, beta_2, alpha_1, alpha_2, lambda, p, w. - -Output contract: -- pyem/models/{model_class}_utils.py -- pyem/models/{model_name}.py (or one file per variant) with mod_desc, mod_spec, mod_id, MODEL and functions mod_params, mod_sim, mod_fit -- examples/{model_class}.ipynb with simulation, fitting, and parameter-recovery comparison across variants - -If any details are ambiguous, ask follow-up questions before generating files. -``` - -### 3) Social signals task with variants - -```text -Use pyem-model-generator. - -Context: -Social signals task. On each trial, participants see options A/B/C, choose one option (store in subject-block-trial arrays), observe clear signal (+1) or not (0), then receive social feedback (thumbs up/down). Use 100 agents, 4 blocks, 12 trials per block. - -Model requirements: -Use equations: -Q_self[s,b,t+1,c] = Q_self[s,b,t,c] + alpha_self * (outcome_self[s,b,t] - Q_self[s,b,t,c]) -Q_other[s,b,t+1,c] = Q_other[s,b,t,c] + alpha_other * (outcome_other[s,b,t] - Q_other[s,b,t,c]) -outcome_self in {0,1} -outcome_other = social_sensitivity_pos*1 (if positive) or social_sensitivity_neg*-1 (if negative), labeled theta -p(choice) = softmax(beta * (w_self*Q_self[s,b,t,c] + w_other*Q_other[s,b,t,c])) -Variants: -- 1b2w2a -- 1b2w2a2t -- 1b2w2a4t -- arbitration variants 1b1o2a4t, 1b1o1a4t, 1b1o2a2t, 1b1o2a with - p(choice) = (1-omega)*softmax(beta*Q_self[s,b,t,c]) + omega*softmax(beta*Q_other[s,b,t,c]) - -Output contract: -- pyem/models/{model_class}_utils.py -- pyem/models/{model_name}.py (or one file per variant) with mod_desc, mod_spec, mod_id, MODEL and functions mod_params, mod_sim, mod_fit -- examples/{model_class}.ipynb with simulation, fitting, and parameter-recovery plots for each variant - -If any details are ambiguous, ask follow-up questions before generating files. +```python +from pyem.api import EMModel ``` diff --git a/skills/pyem-model-generator/SKILL.md b/skills/pyem-model-generator/SKILL.md index d8f8199..1f67663 100644 --- a/skills/pyem-model-generator/SKILL.md +++ b/skills/pyem-model-generator/SKILL.md @@ -1,15 +1,14 @@ --- name: pyem-model-generator -description: Generate new computational cognitive model modules and example notebooks for pyEM-style workflows, including from free-text task/model descriptions. Use this skill to scaffold model-class shared utilities (`modclass_utils.py`), per-model files with `mod_desc/mod_spec/mod_id/MODEL`, and parameter-recovery notebooks when adding models not present in base packages. +description: Generate standalone computational cognitive model modules and example notebooks from free-text or reference specs, using a shared `modclass_utils.py` contract and per-model files with `mod_desc`, `mod_spec`, `mod_id`, `MODEL`, `mod_params`, `mod_sim`, and `mod_fit`. --- # pyem-model-generator -Generate standalone model code and notebooks from bundled references only. +Generate all outputs into the **current working directory** (flat layout). -## Required resources (always local to this skill) +## Required local references -- `references/template.json` - `references/rl.json` - `references/bayes.json` - `references/glm.json` @@ -19,90 +18,79 @@ Generate standalone model code and notebooks from bundled references only. - `references/parameter-recovery-notebook.md` - `references/pyem-runtime-contract.md` -Do not assume repository model files or installed pyem package are available. +Do not require repository path conventions like `pyem/models/...` or `examples/...`. -## Clarification behavior +## Output layout (flat) + +Write files in one directory: + +- `modclass_utils.py` +- `{model_name}.py` (one or more model files) +- `{model_name}.ipynb` (or one notebook per model class) -If required information is missing, ask concise follow-up questions before generation. +## Clarification behavior -Required confirmations: +If required details are missing, ask concise follow-up questions before generation: -1. `model_class`, `model_name`, and output paths. -2. Task structure (`nsubjects`, `nblocks`, `ntrials`, choice count). -3. Parameter names, transforms, bounds, and priors. -4. Update equations and choice rule. -5. Variant definitions and naming. -6. Notebook requirements (recovery metrics and plots). +1. Task structure (`nsubjects`, `nblocks`, `ntrials`, choices, outcomes). +2. Parameter names/transforms/bounds/priors. +3. Equations (state update and choice rule). +4. Variant list and naming. +5. Desired output filenames. ## Free-text parsing workflow -When the user gives prose/equations instead of structured JSON: +When given prose/equations: -1. Place original text in `description_input.raw_text`. -2. Parse into `description_input.extracted_spec`: - - task flow and outcomes, - - tensor shapes, - - update equations, - - choice rule(s), - - variant rules. -3. Normalize symbols into valid Python names. -4. Ask targeted questions for ambiguities (sign conventions, variant toggles, data keys). -5. Preserve equation intent when generating `mod_sim` and `mod_fit`. +1. Extract task flow, tensors, equations, and variants. +2. Normalize symbol names to valid Python variables. +3. Preserve equation intent in `mod_sim`/`mod_fit`. +4. Resolve ambiguities via targeted questions. -## Model-class utility heuristic (required) +## Shared utility heuristic (required) -Generate shared utility module first, then model files: +Create one shared `modclass_utils.py` file containing only: -- Shared module: `pyem/models/{model_class}_utils.py` -- Model module(s): `pyem/models/{model_name}.py` +- `_alloc_sim` +- `_alloc_fit` +- `ModelSpec` +- `ParamDef` +- `spec_to_id` +- `build_params` +- `PARAM_REGISTRY` -Each model file must import shared helpers using this contract: +Each `{model_name}.py` should import shared helpers with: ```python from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params ``` -Shared helper expectations: - -- `_alloc_sim` / `_alloc_fit`: tensor allocation. -- `ModelSpec`: model metadata registration. -- `spec_to_id`: deterministic model ID from `mod_spec`. -- `build_params`: parameter initialization and transforms. - ## Per-model file contract -Each generated `{model_name}.py` should include: +Each generated `{model_name}.py` must include: - attributes: `mod_desc`, `mod_spec`, `mod_id`, `MODEL` - functions: `mod_params`, `mod_sim`, `mod_fit` -Use `references/model-file-template.py` as the base pattern. +Each model file should import math helpers directly from pyem: -## Notebook generation contract +```python +from pyem.utils.math import norm2alpha, norm2beta, softmax, calc_fval +``` -Generate `examples/{model_class}.ipynb` from `references/example-notebook-template.json`. +## Notebook requirements -Required notebook flow: +Generate notebook from `references/example-notebook-template.json` and ensure it imports: -1. model/task overview markdown -2. parameter setup -3. simulation run -4. fit/recovery run -5. parameter recovery plots (identity line + correlation) +```python +from pyem.api import EMModel +``` -See `references/parameter-recovery-notebook.md` for section and plotting details. +Do not use `from scipy.optimize import minimize` in generated notebooks. ## Generation steps -1. Load `references/template.json`. -2. Merge user inputs (or parsed free-text spec) into template fields. -3. Generate `modclass_utils.py` from `references/modelclass-utils-template.py`. -4. Generate `{model_name}.py` from `references/model-file-template.py` with required attributes/functions. -5. Generate notebook from `references/example-notebook-template.json`. -6. Run smoke checks on generated code/notebook cells when execution is requested. - -## Smoke checks - -- Import generated utils and model modules. -- Run `mod_params`, `mod_sim`, and `mod_fit(output="npl")` on minimal synthetic data. -- Verify notebook cells execute through recovery plotting. +1. Select the closest anchor from `references/rl.json`, `references/bayes.json`, `references/glm.json`. +2. Generate `modclass_utils.py` from `references/modelclass-utils-template.py`. +3. Generate each `{model_name}.py` from `references/model-file-template.py`. +4. Generate notebook(s) from `references/example-notebook-template.json` and `references/parameter-recovery-notebook.md`. diff --git a/skills/pyem-model-generator/references/bayes.json b/skills/pyem-model-generator/references/bayes.json index acd32f9..51cdfc3 100644 --- a/skills/pyem-model-generator/references/bayes.json +++ b/skills/pyem-model-generator/references/bayes.json @@ -1,30 +1,18 @@ { "model_class": "bayes", + "utils_file": "modclass_utils.py", "models": [ { "model_name": "bayes_fish", - "model_file": "pyem/models/bayes_fish.py", - "model_file_contract": { - "attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], - "functions": ["mod_params", "mod_sim", "mod_fit"], - "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params" - }, - "mod_spec": { - "rl": { - "softmax": ["beta"] - }, - "link": { - "bayes": ["lambda1"] - } - }, + "model_file": "bayes_fish.py", + "notebook_file": "bayes_fish.ipynb", + "required_attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], + "required_functions": ["mod_params", "mod_sim", "mod_fit"], + "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params", + "math_import": "from pyem.utils.math import norm2alpha, norm2beta, softmax, calc_fval", "parameters": ["beta", "lambda1"], "sim_outputs": ["params", "choices", "observations", "posterior", "nll"], - "fit_outputs": { - "scalar": ["npl", "nll"], - "all": ["params", "posterior", "nll"] - } + "fit_outputs": ["npl", "nll", "all"] } - ], - "utils_file": "pyem/models/bayes_utils.py", - "utils_required_symbols": ["_alloc_sim", "_alloc_fit", "ModelSpec", "spec_to_id", "build_params"] + ] } diff --git a/skills/pyem-model-generator/references/example-notebook-template.json b/skills/pyem-model-generator/references/example-notebook-template.json index c5c6639..1c42905 100644 --- a/skills/pyem-model-generator/references/example-notebook-template.json +++ b/skills/pyem-model-generator/references/example-notebook-template.json @@ -27,7 +27,7 @@ "import numpy as np\\n", "import matplotlib.pyplot as plt\\n", "from pyem.api import EMModel\\n", - "from pyem.models.{model_class} import {model_name}_sim, {model_name}_fit" + "from {model_name} import mod_sim, mod_fit" ] }, { @@ -46,15 +46,15 @@ { "cell_type": "code", "source": [ - "sim = {model_name}_sim(true_params, nblocks=nblocks, ntrials=ntrials)\\n", + "sim = mod_sim(true_params, nblocks=nblocks, ntrials=ntrials)\\n", "sim.keys()" ] }, { "cell_type": "code", "source": [ - "em = EMModel({model_name}_fit, prior='laplace')\\n", - "recovery = em.recover(sim, {model_name}_fit, n_jobs=1)\\n", + "em = EMModel(mod_fit, prior='laplace')\\n", + "recovery = em.recover(sim, mod_fit, n_jobs=1)\\n", "recovery.keys()" ] }, diff --git a/skills/pyem-model-generator/references/glm.json b/skills/pyem-model-generator/references/glm.json index 5154a56..3e174bd 100644 --- a/skills/pyem-model-generator/references/glm.json +++ b/skills/pyem-model-generator/references/glm.json @@ -1,27 +1,18 @@ { "model_class": "glm", + "utils_file": "modclass_utils.py", "models": [ { "model_name": "glm_linear", - "model_file": "pyem/models/glm_linear.py", - "model_file_contract": { - "attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], - "functions": ["mod_params", "mod_sim", "mod_fit"], - "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params" - }, - "mod_spec": { - "cr": { - "linear": ["w0", "w1", "sigma"] - } - }, + "model_file": "glm_linear.py", + "notebook_file": "glm_linear.ipynb", + "required_attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], + "required_functions": ["mod_params", "mod_sim", "mod_fit"], + "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params", + "math_import": "from pyem.utils.math import norm2alpha, norm2beta, softmax, calc_fval", "parameters": ["w0", "w1", "sigma"], "sim_outputs": ["params", "X", "y", "pred", "nll"], - "fit_outputs": { - "scalar": ["npl", "nll"], - "all": ["params", "pred", "nll", "bic"] - } + "fit_outputs": ["npl", "nll", "all"] } - ], - "utils_file": "pyem/models/glm_utils.py", - "utils_required_symbols": ["_alloc_sim", "_alloc_fit", "ModelSpec", "spec_to_id", "build_params"] + ] } diff --git a/skills/pyem-model-generator/references/model-file-template.py b/skills/pyem-model-generator/references/model-file-template.py index 324a7d1..1f46a18 100644 --- a/skills/pyem-model-generator/references/model-file-template.py +++ b/skills/pyem-model-generator/references/model-file-template.py @@ -1,18 +1,15 @@ -"""Template for one generated model module within a model class.""" +"""Template for one generated model module.""" from __future__ import annotations import numpy as np +from pyem.utils.math import calc_fval, norm2alpha, norm2beta, softmax from .modclass_utils import ( ModelSpec, _alloc_fit, _alloc_sim, build_params, - calc_fval, - norm2alpha, - norm2beta, - softmax, spec_to_id, ) diff --git a/skills/pyem-model-generator/references/modelclass-utils-template.py b/skills/pyem-model-generator/references/modelclass-utils-template.py index f005f4a..686efe3 100644 --- a/skills/pyem-model-generator/references/modelclass-utils-template.py +++ b/skills/pyem-model-generator/references/modelclass-utils-template.py @@ -1,7 +1,6 @@ -"""Shared utilities for a model class. +"""Shared utilities for generated model files. -Copy this template to `pyem/models/{modclass}_utils.py` (or equivalent output -path) and customize parameter registries/allocation fields as needed. +Keep this file lightweight and shared across all generated model modules. """ from __future__ import annotations @@ -33,38 +32,8 @@ class ParamDef: init_fn: Callable -def norm2alpha(x: float | np.ndarray) -> float | np.ndarray: - """Map unconstrained real values to (0, 1).""" - return 1.0 / (1.0 + np.exp(-x)) - - -def norm2beta(x: float | np.ndarray) -> float | np.ndarray: - """Map unconstrained real values to (1e-5, 20].""" - return 1e-5 + (20.0 - 1e-5) / (1.0 + np.exp(-x)) - - -def softmax(values: np.ndarray, beta: float) -> np.ndarray: - """Compute numerically stable softmax(beta * values).""" - z = beta * (values - np.max(values)) - exp_z = np.exp(z) - return exp_z / np.sum(exp_z) - - -def calc_fval(nll: float, params: np.ndarray, prior=None, output: str = "npl") -> float: - """Return objective value expected by generated fit functions.""" - if output == "nll" or prior is None: - return float(nll) - if output == "npl": - # lightweight Gaussian prior support - mu = np.asarray(prior.get("mu", np.zeros_like(params)), dtype=float) - sigma = np.asarray(prior.get("sigma", np.ones_like(params)), dtype=float) - log_prior = -0.5 * np.sum(((params - mu) / sigma) ** 2) - return float(nll - log_prior) - raise ValueError("output must be 'npl' or 'nll'") - - def spec_to_id(spec: dict) -> str: - """Convert a spec dictionary into a deterministic model ID string.""" + """Convert a nested spec dictionary into a deterministic ID string.""" block_order = ["rl", "cr", "link"] op_alias = {"linear": "lin"} @@ -117,8 +86,8 @@ def _alloc_fit(nblocks: int, ntrials: int, nchoices: int = 2) -> Dict[str, np.nd PARAM_REGISTRY = { - "beta": ParamDef("beta", norm2beta, lambda rng, n: rng.uniform(0.5, 8.0, size=n)), - "alpha": ParamDef("alpha", norm2alpha, lambda rng, n: rng.uniform(0.1, 0.9, size=n)), + "beta": ParamDef("beta", lambda x: x, lambda rng, n: rng.uniform(0.5, 8.0, size=n)), + "alpha": ParamDef("alpha", lambda x: x, lambda rng, n: rng.uniform(0.1, 0.9, size=n)), } @@ -127,7 +96,7 @@ def build_params( nsubj: int, rng: np.random.Generator | None = None, ) -> tuple[list[str], list[Callable], np.ndarray]: - """Build transformed parameter metadata and sampled true params.""" + """Build parameter transforms and sampled true params.""" if rng is None: rng = np.random.default_rng() diff --git a/skills/pyem-model-generator/references/parameter-recovery-notebook.md b/skills/pyem-model-generator/references/parameter-recovery-notebook.md index c43e3cc..77efdf3 100644 --- a/skills/pyem-model-generator/references/parameter-recovery-notebook.md +++ b/skills/pyem-model-generator/references/parameter-recovery-notebook.md @@ -1,6 +1,6 @@ # Parameter recovery notebook pattern -Use this reference to implement `examples/{model_class}.ipynb` even when base example notebooks are unavailable. +Use this reference to implement `{model_name}.ipynb` in a flat output directory, even when base example notebooks are unavailable. This pattern follows the bundled offline templates and anchor specs (`references/rl.json`, `references/bayes.json`, `references/glm.json`) so notebook generation does not require repository access: @@ -22,7 +22,7 @@ This pattern follows the bundled offline templates and anchor specs (`references ## Template source -Use `references/example-notebook-template.json` as the base cell template. Replace all placeholders (for example `{model_name}`, `{model_class}`, bounds, and parameter names). +Use `references/example-notebook-template.json` as the base cell template. Replace all placeholders (for example `{model_name}`, bounds, and parameter names). ## Minimal recovery workflow diff --git a/skills/pyem-model-generator/references/pyem-runtime-contract.md b/skills/pyem-model-generator/references/pyem-runtime-contract.md index bcce0e5..64e235c 100644 --- a/skills/pyem-model-generator/references/pyem-runtime-contract.md +++ b/skills/pyem-model-generator/references/pyem-runtime-contract.md @@ -1,73 +1,42 @@ -# pyEM runtime contract (offline reference) +# Runtime contract for generated models -Use this file when the full `pyem` package is unavailable. It defines minimal contracts needed to generate compatible model modules. - -## Expected utility imports - -Preferred import in generated model files: +Generated model files should import math helpers directly from pyem: ```python -from ..utils.math import softmax, norm2alpha, norm2beta, calc_fval +from pyem.utils.math import norm2alpha, norm2beta, softmax, calc_fval ``` -## Utility behavior +The shared `modclass_utils.py` file should **not** define these math helpers. +It should only provide: -### `softmax(values, beta)` +- `_alloc_sim` +- `_alloc_fit` +- `ModelSpec` +- `ParamDef` +- `spec_to_id` +- `build_params` +- `PARAM_REGISTRY` -- Inputs: - - `values`: 1D array-like action values. - - `beta`: inverse temperature (`> 0`). -- Output: - - Probability vector matching `values` length. -- Stable form: +## Function contracts -```python -z = beta * (values - np.max(values)) -exp_z = np.exp(z) -p = exp_z / np.sum(exp_z) -``` +## `mod_params(nsubj, rng=None)` -### `norm2alpha(x)` +- Returns `(param_names, param_xform, true_params)`. +- `true_params` shape: `(nsubj, nparams)`. -- Maps unconstrained real `x` to `(0, 1)`. -- Logistic form is acceptable: +## `mod_sim(params, ..., **kwargs)` -```python -alpha = 1.0 / (1.0 + np.exp(-x)) -``` - -### `norm2beta(x)` +- Returns dictionary with stable keys (at least params/choices/state/nll keys). +- Uses natural-space parameters for simulation unless otherwise specified. -- Maps unconstrained real `x` to `(1e-5, 20]`. -- Compatible bounded-sigmoid form: - -```python -beta = 1e-5 + (20.0 - 1e-5) / (1.0 + np.exp(-x)) -``` - -### `calc_fval(nll, params, prior=None, output="npl")` - -- `output="nll"`: return `nll`. -- `output="npl"`: return `nll - log_prior(params)` if prior exists; else `nll`. -- `output="all"`: typically handled by caller model function. - -## Prior contract - -Use a lightweight prior dictionary that can be passed through unchanged to pyEM: - -```python -prior = { - "mu": np.array([...]), - "sigma": np.array([...]), -} -``` +## `mod_fit(params, ..., prior=None, output="npl")` -If prior shape mismatches params, return a large penalty value (commonly `1e7`). +- Must support `output="npl"`, `"nll"`, and optionally `"all"`. +- Uses transformed parameters (`norm2alpha`, `norm2beta`) when constraints require. +- Returns large penalty (commonly `1e7`) for invalid parameter regions. +- Uses `calc_fval` for scalar objective outputs. -## Fit function contract +## Prior handling -- Signature pattern: - - `{model_name}_fit(params, *, prior=None, output="npl", **kwargs)` -- Must support at least `output in {"npl", "nll", "all"}`. -- Must return scalar for `"npl"`/`"nll"`. -- For invalid transformed params, return `1e7`. +- Prior can be `None` or a dictionary accepted by `calc_fval`. +- Pass prior through unchanged to `calc_fval`. diff --git a/skills/pyem-model-generator/references/rl.json b/skills/pyem-model-generator/references/rl.json index 95e7b49..e77cb6b 100644 --- a/skills/pyem-model-generator/references/rl.json +++ b/skills/pyem-model-generator/references/rl.json @@ -1,28 +1,18 @@ { "model_class": "rl", + "utils_file": "modclass_utils.py", "models": [ { "model_name": "rw1a1b", - "model_file": "pyem/models/rw1a1b.py", - "model_file_contract": { - "attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], - "functions": ["mod_params", "mod_sim", "mod_fit"], - "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params" - }, - "mod_spec": { - "rl": { - "softmax": ["beta"], - "rw": ["alpha"] - } - }, + "model_file": "rw1a1b.py", + "notebook_file": "rw1a1b.ipynb", + "required_attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], + "required_functions": ["mod_params", "mod_sim", "mod_fit"], + "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params", + "math_import": "from pyem.utils.math import norm2alpha, norm2beta, softmax, calc_fval", "parameters": ["beta", "alpha"], "sim_outputs": ["params", "choices", "ev", "ch_prob", "pe", "nll"], - "fit_outputs": { - "scalar": ["npl", "nll"], - "all": ["params", "ev", "ch_prob", "pe", "nll"] - } + "fit_outputs": ["npl", "nll", "all"] } - ], - "utils_file": "pyem/models/rl_utils.py", - "utils_required_symbols": ["_alloc_sim", "_alloc_fit", "ModelSpec", "spec_to_id", "build_params"] + ] } diff --git a/skills/pyem-model-generator/references/template.json b/skills/pyem-model-generator/references/template.json deleted file mode 100644 index 7a3110a..0000000 --- a/skills/pyem-model-generator/references/template.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "version": "2.0", - "model_class": "", - "model_name": "", - "paths": { - "utils_file": "pyem/models/{model_class}_utils.py", - "model_file": "pyem/models/{model_name}.py", - "notebook_file": "examples/{model_class}.ipynb" - }, - "model_file_contract": { - "required_attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], - "required_functions": ["mod_params", "mod_sim", "mod_fit"], - "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params" - }, - "task": { - "nsubjects": 100, - "nblocks": 4, - "ntrials": 12, - "nchoices": 2, - "choice_labels": ["A", "B"], - "outcome_keys": ["rewards"] - }, - "parameters": [ - { - "name": "beta", - "index": 0, - "transform": "norm2beta", - "bounds": [1e-05, 20.0] - }, - { - "name": "alpha", - "index": 1, - "transform": "norm2alpha", - "bounds": [0.0, 1.0] - } - ], - "equations": { - "state_updates": [], - "choice_rule": "softmax(beta * values)" - }, - "variants": { - "requested": false, - "variant_names": [], - "variant_rules": [] - }, - "description_input": { - "raw_text": "", - "extracted_spec": { - "task_flow": [], - "state_tensors": [], - "update_equations": [], - "choice_rule": "", - "variant_rules": [] - } - }, - "notebook": { - "template_file": "references/example-notebook-template.json", - "include_parameter_recovery_plot": true, - "nsubjects": 50, - "random_seed": 123 - }, - "reference_specs": [ - "references/rl.json", - "references/bayes.json", - "references/glm.json" - ] -} diff --git a/skills/pyem-model-generator/template.json b/skills/pyem-model-generator/template.json deleted file mode 100644 index 7a3110a..0000000 --- a/skills/pyem-model-generator/template.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "version": "2.0", - "model_class": "", - "model_name": "", - "paths": { - "utils_file": "pyem/models/{model_class}_utils.py", - "model_file": "pyem/models/{model_name}.py", - "notebook_file": "examples/{model_class}.ipynb" - }, - "model_file_contract": { - "required_attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], - "required_functions": ["mod_params", "mod_sim", "mod_fit"], - "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params" - }, - "task": { - "nsubjects": 100, - "nblocks": 4, - "ntrials": 12, - "nchoices": 2, - "choice_labels": ["A", "B"], - "outcome_keys": ["rewards"] - }, - "parameters": [ - { - "name": "beta", - "index": 0, - "transform": "norm2beta", - "bounds": [1e-05, 20.0] - }, - { - "name": "alpha", - "index": 1, - "transform": "norm2alpha", - "bounds": [0.0, 1.0] - } - ], - "equations": { - "state_updates": [], - "choice_rule": "softmax(beta * values)" - }, - "variants": { - "requested": false, - "variant_names": [], - "variant_rules": [] - }, - "description_input": { - "raw_text": "", - "extracted_spec": { - "task_flow": [], - "state_tensors": [], - "update_equations": [], - "choice_rule": "", - "variant_rules": [] - } - }, - "notebook": { - "template_file": "references/example-notebook-template.json", - "include_parameter_recovery_plot": true, - "nsubjects": 50, - "random_seed": 123 - }, - "reference_specs": [ - "references/rl.json", - "references/bayes.json", - "references/glm.json" - ] -} From 08ac1b20881aebf136a0b6d116c07ad17989a58d Mon Sep 17 00:00:00 2001 From: Shawn Rhoads <24925845+shawnrhoads@users.noreply.github.com> Date: Sun, 26 Apr 2026 20:28:47 -0400 Subject: [PATCH 06/14] Align utils naming and notebook template loading workflow --- skills/pyem-model-generator/SKILL.md | 4 +- .../references/example-notebook-template.json | 46 ++++++++----------- 2 files changed, 21 insertions(+), 29 deletions(-) diff --git a/skills/pyem-model-generator/SKILL.md b/skills/pyem-model-generator/SKILL.md index 1f67663..00ce86d 100644 --- a/skills/pyem-model-generator/SKILL.md +++ b/skills/pyem-model-generator/SKILL.md @@ -24,7 +24,7 @@ Do not require repository path conventions like `pyem/models/...` or `examples/. Write files in one directory: -- `modclass_utils.py` +- `{modclass}_utils.py` - `{model_name}.py` (one or more model files) - `{model_name}.ipynb` (or one notebook per model class) @@ -49,7 +49,7 @@ When given prose/equations: ## Shared utility heuristic (required) -Create one shared `modclass_utils.py` file containing only: +Create one shared `{modclass}_utils.py` file containing only: - `_alloc_sim` - `_alloc_fit` diff --git a/skills/pyem-model-generator/references/example-notebook-template.json b/skills/pyem-model-generator/references/example-notebook-template.json index 1c42905..818fa59 100644 --- a/skills/pyem-model-generator/references/example-notebook-template.json +++ b/skills/pyem-model-generator/references/example-notebook-template.json @@ -24,57 +24,49 @@ { "cell_type": "code", "source": [ + "import importlib\\n", "import numpy as np\\n", "import matplotlib.pyplot as plt\\n", - "from pyem.api import EMModel\\n", - "from {model_name} import mod_sim, mod_fit" + "from pyem.api import EMModel" ] }, { "cell_type": "code", "source": [ - "rng = np.random.default_rng({random_seed})\\n", - "nsubjects = {nsubjects}\\n", + "script_fn = \"{model_name}\"\\n", + "nsubj = {nsubjects}\\n", "nblocks = {nblocks}\\n", "ntrials = {ntrials}\\n", - "true_params = np.column_stack([\\n", - " rng.uniform({p1_low}, {p1_high}, nsubjects),\\n", - " rng.uniform({p2_low}, {p2_high}, nsubjects),\\n", - "])" + "module = importlib.import_module(f\"{script_fn}\")\\n", + "MODEL = module.MODEL\\n", + "print(script_fn, end=\"\\n\")\\n", + "print(MODEL.id, end=\"\\n\\n\")\\n", + "print(MODEL.desc, end=\"\\n\\n\")\\n", + "mod_params, mod_sim, mod_fit = MODEL.params, MODEL.sim, MODEL.fit\\n", + "param_names, param_xform, true_params = mod_params(nsubj)" ] }, { "cell_type": "code", "source": [ - "sim = mod_sim(true_params, nblocks=nblocks, ntrials=ntrials)\\n", - "sim.keys()" + "sim_outp = mod_sim(true_params, nblocks=nblocks, ntrials=ntrials)\\n", + "sim_data = [[sim_outp['choices'][i, ...], sim_outp['ev'][i, ...], sim_outp['pe'][i, ...]] for i in range(nsubj)]\\n", + "len(sim_data)" ] }, { "cell_type": "code", "source": [ - "em = EMModel(mod_fit, prior='laplace')\\n", - "recovery = em.recover(sim, mod_fit, n_jobs=1)\\n", - "recovery.keys()" + "model = EMModel(all_data=sim_data, fit_func=mod_fit, param_names=param_names, param_xform=param_xform)\\n", + "result = model.fit(verbose=1)\\n", + "result" ] }, { "cell_type": "code", "source": [ - "fitted = np.asarray(recovery['mfit'])\\n", - "param_names = {param_names}\\n", - "n_params = true_params.shape[1]\\n", - "fig, axes = plt.subplots(1, n_params, figsize=(4 * n_params, 4))\\n", - "for i, ax in enumerate(np.atleast_1d(axes)):\\n", - " ax.scatter(true_params[:, i], fitted[:, i], alpha=0.7)\\n", - " lo = min(true_params[:, i].min(), fitted[:, i].min())\\n", - " hi = max(true_params[:, i].max(), fitted[:, i].max())\\n", - " ax.plot([lo, hi], [lo, hi], 'k--', linewidth=1)\\n", - " r = np.corrcoef(true_params[:, i], fitted[:, i])[0, 1]\\n", - " ax.set_title(f\"{param_names[i]} (r={r:.2f})\")\\n", - " ax.set_xlabel('True')\\n", - " ax.set_ylabel('Recovered')\\n", - "plt.tight_layout()" + "fig = model.plot_recovery({'true_params': true_params, 'estimated_params': model.outfit['params']})\\n", + "fig" ] } ] From c3428231b602354867e38acc5c68cdd2c88c5fc2 Mon Sep 17 00:00:00 2001 From: Shawn Rhoads <24925845+shawnrhoads@users.noreply.github.com> Date: Sun, 26 Apr 2026 20:29:09 -0400 Subject: [PATCH 07/14] Rewrite skill README for first-time users --- skills/pyem-model-generator/README.md | 81 +++++++++++++++++++++------ 1 file changed, 63 insertions(+), 18 deletions(-) diff --git a/skills/pyem-model-generator/README.md b/skills/pyem-model-generator/README.md index 1a1cc42..ee60f87 100644 --- a/skills/pyem-model-generator/README.md +++ b/skills/pyem-model-generator/README.md @@ -1,17 +1,21 @@ # pyem-model-generator skill -Use this skill to scaffold standalone computational cognitive model files and recovery notebooks from reference specs or free-text equations. +Use this skill to generate standalone computational cognitive model code and a matching parameter-recovery notebook. -## Plan to fix known flaws +## What this skill generates -1. Remove smoke-test requirements from the skill workflow. -2. Enforce notebook import of `EMModel` (`from pyem.api import EMModel`) and forbid `scipy.optimize.minimize` usage in notebook templates. -3. Remove duplicated template files (`template.json`, `references/template.json`). -4. Enforce flat output layout in one directory (no `pyem/models/...` or `examples/...` paths). -5. Restrict shared utils to exactly: `_alloc_sim`, `_alloc_fit`, `ModelSpec`, `ParamDef`, `spec_to_id`, `build_params`, `PARAM_REGISTRY`. -6. Keep math helper imports (`norm2alpha`, `norm2beta`, `softmax`, `calc_fval`) in model files and document them in `references/pyem-runtime-contract.md`. +Given a task/model description, the skill generates files in a **single directory**: -## Current references +- `{modclass}_utils.py` +- `{model_name}.py` +- `{model_name}.ipynb` + +The generated model file follows a consistent contract: + +- attributes: `mod_desc`, `mod_spec`, `mod_id`, `MODEL` +- functions: `mod_params`, `mod_sim`, `mod_fit` + +## Required references bundled with the skill - `references/rl.json` - `references/bayes.json` @@ -22,21 +26,62 @@ Use this skill to scaffold standalone computational cognitive model files and re - `references/parameter-recovery-notebook.md` - `references/pyem-runtime-contract.md` -## Output contract +## Quick start (first-time users) -Generate all files in the same directory: +1. Describe your task and model in plain language (or equations). +2. Ask the skill to generate: + - `{modclass}_utils.py` + - `{model_name}.py` + - `{model_name}.ipynb` +3. If details are missing, answer the skill’s follow-up questions. +4. Review generated files and run your analysis workflow. -- `modclass_utils.py` -- `{model_name}.py` -- `{model_name}.ipynb` +## Notes on generated files + +### Shared utils file + +`{modclass}_utils.py` should define shared helpers used across model files: -Each `{model_name}.py` must define: +- `_alloc_sim`, `_alloc_fit` +- `ModelSpec`, `ParamDef` +- `spec_to_id`, `build_params` +- `PARAM_REGISTRY` -- `mod_desc`, `mod_spec`, `mod_id`, `MODEL` -- `mod_params`, `mod_sim`, `mod_fit` +### Model file -Each notebook must use: +`{model_name}.py` imports math helpers from pyEM: + +```python +from pyem.utils.math import norm2alpha, norm2beta, softmax, calc_fval +``` + +And imports shared helpers from: + +```python +from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params +``` + +### Notebook file + +The notebook template uses: ```python from pyem.api import EMModel ``` + +and follows a simulation → fit → recovery plot workflow. + +## Example prompt + +```text +Use pyem-model-generator. +Generate standalone files in one directory: +- social_utils.py +- social_rw.py +- social_rw.ipynb + +Task: three-option social learning task with 4 blocks x 12 trials and 100 agents. +Model: dual-value update equations for self and other values with softmax choice. +Include parameter recovery plots in the notebook. +Ask follow-up questions before generation if any details are ambiguous. +``` From e76b0cd9db4cf3f5dcfb19d6f8150c70cf6bda14 Mon Sep 17 00:00:00 2001 From: Shawn Rhoads <24925845+shawnrhoads@users.noreply.github.com> Date: Sun, 26 Apr 2026 20:41:09 -0400 Subject: [PATCH 08/14] Update skills/pyem-model-generator/references/example-notebook-template.json Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../references/example-notebook-template.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skills/pyem-model-generator/references/example-notebook-template.json b/skills/pyem-model-generator/references/example-notebook-template.json index 818fa59..f87cf46 100644 --- a/skills/pyem-model-generator/references/example-notebook-template.json +++ b/skills/pyem-model-generator/references/example-notebook-template.json @@ -50,7 +50,7 @@ "cell_type": "code", "source": [ "sim_outp = mod_sim(true_params, nblocks=nblocks, ntrials=ntrials)\\n", - "sim_data = [[sim_outp['choices'][i, ...], sim_outp['ev'][i, ...], sim_outp['pe'][i, ...]] for i in range(nsubj)]\\n", + "sim_data = [[sim_outp['choices'][i, ...], sim_outp['rewards'][i, ...]] for i in range(nsubj)]\\n", "len(sim_data)" ] }, From ad2841f414fe24d885132ac1524d78b6e662a3ab Mon Sep 17 00:00:00 2001 From: Shawn Rhoads <24925845+shawnrhoads@users.noreply.github.com> Date: Sun, 26 Apr 2026 20:43:17 -0400 Subject: [PATCH 09/14] Update skills/pyem-model-generator/references/rl.json Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- skills/pyem-model-generator/references/rl.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skills/pyem-model-generator/references/rl.json b/skills/pyem-model-generator/references/rl.json index e77cb6b..0e04d5c 100644 --- a/skills/pyem-model-generator/references/rl.json +++ b/skills/pyem-model-generator/references/rl.json @@ -8,7 +8,7 @@ "notebook_file": "rw1a1b.ipynb", "required_attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], "required_functions": ["mod_params", "mod_sim", "mod_fit"], - "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params", + "shared_import": "from modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params", "math_import": "from pyem.utils.math import norm2alpha, norm2beta, softmax, calc_fval", "parameters": ["beta", "alpha"], "sim_outputs": ["params", "choices", "ev", "ch_prob", "pe", "nll"], From 64fd2141915ad9b5e8970781965139a0079b6492 Mon Sep 17 00:00:00 2001 From: Shawn Rhoads <24925845+shawnrhoads@users.noreply.github.com> Date: Sun, 26 Apr 2026 20:43:28 -0400 Subject: [PATCH 10/14] Update skills/pyem-model-generator/references/bayes.json Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- skills/pyem-model-generator/references/bayes.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skills/pyem-model-generator/references/bayes.json b/skills/pyem-model-generator/references/bayes.json index 51cdfc3..3a048a7 100644 --- a/skills/pyem-model-generator/references/bayes.json +++ b/skills/pyem-model-generator/references/bayes.json @@ -8,7 +8,7 @@ "notebook_file": "bayes_fish.ipynb", "required_attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], "required_functions": ["mod_params", "mod_sim", "mod_fit"], - "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params", + "shared_import": "from modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params", "math_import": "from pyem.utils.math import norm2alpha, norm2beta, softmax, calc_fval", "parameters": ["beta", "lambda1"], "sim_outputs": ["params", "choices", "observations", "posterior", "nll"], From 394d5468998bfd11f9fa93c68c2794594923b026 Mon Sep 17 00:00:00 2001 From: Shawn Rhoads <24925845+shawnrhoads@users.noreply.github.com> Date: Sun, 26 Apr 2026 20:43:51 -0400 Subject: [PATCH 11/14] Update skills/pyem-model-generator/references/pyem-runtime-contract.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../pyem-model-generator/references/pyem-runtime-contract.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/skills/pyem-model-generator/references/pyem-runtime-contract.md b/skills/pyem-model-generator/references/pyem-runtime-contract.md index 64e235c..a4f642b 100644 --- a/skills/pyem-model-generator/references/pyem-runtime-contract.md +++ b/skills/pyem-model-generator/references/pyem-runtime-contract.md @@ -26,7 +26,9 @@ It should only provide: ## `mod_sim(params, ..., **kwargs)` -- Returns dictionary with stable keys (at least params/choices/state/nll keys). +- Returns a dictionary with stable keys appropriate to the model class. +- Common required outputs are `params` and `choices`; include `rewards` when the task/model uses reward feedback or when downstream fitting/diagnostics require it. +- Model-specific latent/diagnostic arrays such as `state`, `ev`, `pe`, and similar traces may be included, but no single latent key is required for all models. - Uses natural-space parameters for simulation unless otherwise specified. ## `mod_fit(params, ..., prior=None, output="npl")` From 1182f2f40c80dc5dd139aebae14a596fcb0077af Mon Sep 17 00:00:00 2001 From: Shawn Rhoads <24925845+shawnrhoads@users.noreply.github.com> Date: Sun, 26 Apr 2026 20:44:03 -0400 Subject: [PATCH 12/14] Update skills/pyem-model-generator/SKILL.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- skills/pyem-model-generator/SKILL.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skills/pyem-model-generator/SKILL.md b/skills/pyem-model-generator/SKILL.md index 00ce86d..60e0e4a 100644 --- a/skills/pyem-model-generator/SKILL.md +++ b/skills/pyem-model-generator/SKILL.md @@ -62,7 +62,7 @@ Create one shared `{modclass}_utils.py` file containing only: Each `{model_name}.py` should import shared helpers with: ```python -from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params +from {modclass}_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params ``` ## Per-model file contract From 43ce4355d264ee14988a6f24100274a87c65b983 Mon Sep 17 00:00:00 2001 From: Shawn Rhoads <24925845+shawnrhoads@users.noreply.github.com> Date: Sun, 26 Apr 2026 20:44:19 -0400 Subject: [PATCH 13/14] Update skills/pyem-model-generator/README.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- skills/pyem-model-generator/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skills/pyem-model-generator/README.md b/skills/pyem-model-generator/README.md index ee60f87..9ed74c5 100644 --- a/skills/pyem-model-generator/README.md +++ b/skills/pyem-model-generator/README.md @@ -58,7 +58,7 @@ from pyem.utils.math import norm2alpha, norm2beta, softmax, calc_fval And imports shared helpers from: ```python -from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params +from {modclass}_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params ``` ### Notebook file From 523024636015663bafbc5cf89f04fc11e4dd948f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 27 Apr 2026 00:45:08 +0000 Subject: [PATCH 14/14] Fix relative imports, extend PARAM_REGISTRY, add rewards to sim, update prior contract Agent-Logs-Url: https://github.com/shawnrhoads/pyEM/sessions/3f8576e3-eeb1-4323-92d6-bd7d64320b62 Co-authored-by: shawnrhoads <24925845+shawnrhoads@users.noreply.github.com> --- .../pyem-model-generator/references/glm.json | 2 +- .../references/model-file-template.py | 3 +- .../references/modelclass-utils-template.py | 33 +++++++++++++++++-- .../references/pyem-runtime-contract.md | 7 ++-- .../pyem-model-generator/references/rl.json | 2 +- 5 files changed, 37 insertions(+), 10 deletions(-) diff --git a/skills/pyem-model-generator/references/glm.json b/skills/pyem-model-generator/references/glm.json index 3e174bd..d0ce16b 100644 --- a/skills/pyem-model-generator/references/glm.json +++ b/skills/pyem-model-generator/references/glm.json @@ -8,7 +8,7 @@ "notebook_file": "glm_linear.ipynb", "required_attributes": ["mod_desc", "mod_spec", "mod_id", "MODEL"], "required_functions": ["mod_params", "mod_sim", "mod_fit"], - "shared_import": "from .modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params", + "shared_import": "from modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params", "math_import": "from pyem.utils.math import norm2alpha, norm2beta, softmax, calc_fval", "parameters": ["w0", "w1", "sigma"], "sim_outputs": ["params", "X", "y", "pred", "nll"], diff --git a/skills/pyem-model-generator/references/model-file-template.py b/skills/pyem-model-generator/references/model-file-template.py index 1f46a18..a19d7e4 100644 --- a/skills/pyem-model-generator/references/model-file-template.py +++ b/skills/pyem-model-generator/references/model-file-template.py @@ -5,7 +5,7 @@ import numpy as np from pyem.utils.math import calc_fval, norm2alpha, norm2beta, softmax -from .modclass_utils import ( +from modclass_utils import ( ModelSpec, _alloc_fit, _alloc_sim, @@ -41,6 +41,7 @@ def mod_sim(params: np.ndarray, nblocks: int = 4, ntrials: int = 12, **kwargs): c = rng.choice([0, 1], p=p) r = float(rng.integers(0, 2)) dat["choices"][s, b, t] = "A" if c == 0 else "B" + dat["rewards"][s, b, t] = r dat["ch_prob"][s, b, t, :] = p dat["pe"][s, b, t] = r - dat["ev"][s, b, t, c] dat["ev"][s, b, t + 1, :] = dat["ev"][s, b, t, :] diff --git a/skills/pyem-model-generator/references/modelclass-utils-template.py b/skills/pyem-model-generator/references/modelclass-utils-template.py index 686efe3..82f0ffc 100644 --- a/skills/pyem-model-generator/references/modelclass-utils-template.py +++ b/skills/pyem-model-generator/references/modelclass-utils-template.py @@ -68,6 +68,7 @@ def _alloc_sim(nsubj: int, nblocks: int, ntrials: int, nchoices: int = 2) -> Dic """Allocate common simulation arrays.""" return { "choices": np.zeros((nsubj, nblocks, ntrials), dtype=object), + "rewards": np.zeros((nsubj, nblocks, ntrials), dtype=float), "ev": np.zeros((nsubj, nblocks, ntrials + 1, nchoices), dtype=float), "ch_prob": np.zeros((nsubj, nblocks, ntrials, nchoices), dtype=float), "pe": np.zeros((nsubj, nblocks, ntrials), dtype=float), @@ -85,9 +86,25 @@ def _alloc_fit(nblocks: int, ntrials: int, nchoices: int = 2) -> Dict[str, np.nd } +def _norm2unit(x): + """Map unconstrained Gaussian-space values to the open unit interval.""" + x = np.asarray(x, dtype=float) + return 1.0 / (1.0 + np.exp(-x)) + + +def _norm2pos(x): + """Map unconstrained Gaussian-space values to positive reals.""" + x = np.asarray(x, dtype=float) + return np.exp(x) + + PARAM_REGISTRY = { - "beta": ParamDef("beta", lambda x: x, lambda rng, n: rng.uniform(0.5, 8.0, size=n)), - "alpha": ParamDef("alpha", lambda x: x, lambda rng, n: rng.uniform(0.1, 0.9, size=n)), + "beta": ParamDef("beta", _norm2pos, lambda rng, n: rng.uniform(0.5, 8.0, size=n)), + "alpha": ParamDef("alpha", _norm2unit, lambda rng, n: rng.uniform(0.1, 0.9, size=n)), + "lambda1": ParamDef("lambda1", _norm2unit, lambda rng, n: rng.uniform(0.1, 0.9, size=n)), + "w0": ParamDef("w0", lambda x: x, lambda rng, n: rng.normal(0.0, 1.0, size=n)), + "w1": ParamDef("w1", lambda x: x, lambda rng, n: rng.normal(0.0, 1.0, size=n)), + "sigma": ParamDef("sigma", _norm2pos, lambda rng, n: rng.uniform(0.1, 2.0, size=n)), } @@ -96,7 +113,17 @@ def build_params( nsubj: int, rng: np.random.Generator | None = None, ) -> tuple[list[str], list[Callable], np.ndarray]: - """Build parameter transforms and sampled true params.""" + """Build parameter transforms and sampled true params. + + Returns a 3-tuple: + - param_names: list of parameter name strings + - param_xform: list of callables that map optimizer outputs (Gaussian / + unconstrained space) to natural-space parameter values; passed directly + to ``EMModel(param_xform=...)`` + - true_params: ``(nsubj, nparams)`` array of natural-space ground-truth + parameters generated by each ``ParamDef.init_fn``; used directly in + ``mod_sim`` and as the reference for recovery comparisons + """ if rng is None: rng = np.random.default_rng() diff --git a/skills/pyem-model-generator/references/pyem-runtime-contract.md b/skills/pyem-model-generator/references/pyem-runtime-contract.md index a4f642b..86cb4c9 100644 --- a/skills/pyem-model-generator/references/pyem-runtime-contract.md +++ b/skills/pyem-model-generator/references/pyem-runtime-contract.md @@ -28,8 +28,7 @@ It should only provide: - Returns a dictionary with stable keys appropriate to the model class. - Common required outputs are `params` and `choices`; include `rewards` when the task/model uses reward feedback or when downstream fitting/diagnostics require it. -- Model-specific latent/diagnostic arrays such as `state`, `ev`, `pe`, and similar traces may be included, but no single latent key is required for all models. -- Uses natural-space parameters for simulation unless otherwise specified. +- Model-specific latent/diagnostic arrays such as `ev`, `pe`, and similar traces may be included, but no single latent key is required for all models. ## `mod_fit(params, ..., prior=None, output="npl")` @@ -40,5 +39,5 @@ It should only provide: ## Prior handling -- Prior can be `None` or a dictionary accepted by `calc_fval`. -- Pass prior through unchanged to `calc_fval`. +- Prior can be `None` or a prior-like object that implements a `logpdf` method compatible with `calc_fval`. +- Pass the prior object through unchanged to `calc_fval`. diff --git a/skills/pyem-model-generator/references/rl.json b/skills/pyem-model-generator/references/rl.json index 0e04d5c..48fb2a5 100644 --- a/skills/pyem-model-generator/references/rl.json +++ b/skills/pyem-model-generator/references/rl.json @@ -11,7 +11,7 @@ "shared_import": "from modclass_utils import _alloc_sim, _alloc_fit, ModelSpec, spec_to_id, build_params", "math_import": "from pyem.utils.math import norm2alpha, norm2beta, softmax, calc_fval", "parameters": ["beta", "alpha"], - "sim_outputs": ["params", "choices", "ev", "ch_prob", "pe", "nll"], + "sim_outputs": ["params", "choices", "rewards", "ev", "ch_prob", "pe", "nll"], "fit_outputs": ["npl", "nll", "all"] } ]