PunchTheDev · PunchTheDev · Jun 3, 2026 · Jun 3, 2026
diff --git a/agents/template/agent.py b/agents/template/agent.py
@@ -25,8 +25,8 @@ def generate(spec: dict, llm: LLMClient) -> bytes:
     """
     Build and return a STEP file for the given spec.
 
-    Use `llm.chat(messages)` to call the whitelisted LLM
-    (claude-haiku-4-5, claude-3-5-haiku, or gpt-4o-mini).
+    Use `llm.chat(messages)` to call the whitelisted LLM.
+    Allowed models are listed in config/model-whitelist.txt.
 
     Args:
         spec: Problem specification dict. Key fields:

diff --git a/cli.py b/cli.py
@@ -730,10 +730,15 @@ def _run_evaluate(agent_path: str, spec_path: str, verbose: bool) -> dict:
     # Inherit environment; supply defaults so LLM agents work without extra setup.
     env = os.environ.copy()
     env.setdefault("FORGE_MODEL", "anthropic/claude-haiku-4-5")
-    env.setdefault(
-        "FORGE_MODEL_WHITELIST",
-        "anthropic/claude-haiku-4-5,anthropic/claude-3-5-haiku,openai/gpt-4o-mini",
-    )
+    wl_path = ROOT / "config" / "model-whitelist.txt"
+    if wl_path.exists():
+        wl = ",".join(
+            l.strip() for l in wl_path.read_text().splitlines()
+            if l.strip() and not l.strip().startswith("#")
+        )
+    else:
+        wl = "anthropic/claude-haiku-4-5,anthropic/claude-3-5-haiku,openai/gpt-4o-mini"
+    env.setdefault("FORGE_MODEL_WHITELIST", wl)
     try:
         proc = subprocess.run(cmd, capture_output=True, text=True, cwd=str(ROOT), env=env)
     except FileNotFoundError:

diff --git a/scripts/run_eval_pool.py b/scripts/run_eval_pool.py
@@ -21,10 +21,15 @@
 agent = os.environ["AGENT_PATH"]
 llm_key = os.environ.get("FORGE_LLM_KEY", "")
 model = os.environ.get("FORGE_MODEL", "anthropic/claude-haiku-4-5")
-whitelist = os.environ.get(
-    "FORGE_MODEL_WHITELIST",
-    "anthropic/claude-haiku-4-5,anthropic/claude-3-5-haiku,openai/gpt-4o-mini",
-)
+def _load_whitelist_default() -> str:
+    wl_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "config", "model-whitelist.txt")
+    try:
+        lines = open(wl_path).read().splitlines()
+        return ",".join(l.strip() for l in lines if l.strip() and not l.strip().startswith("#"))
+    except OSError:
+        return "anthropic/claude-haiku-4-5,anthropic/claude-3-5-haiku,openai/gpt-4o-mini"
+
+whitelist = os.environ.get("FORGE_MODEL_WHITELIST") or _load_whitelist_default()
 workspace = os.getcwd()
 
 results = []

diff --git a/scripts/run_hidden_eval.py b/scripts/run_hidden_eval.py
@@ -37,10 +37,18 @@
 COMMIT_HASH = os.environ.get("COMMIT_HASH", "unknown")
 LLM_KEY = os.environ.get("FORGE_LLM_KEY", "")
 MODEL = os.environ.get("FORGE_MODEL", "anthropic/claude-haiku-4-5")
-WHITELIST = os.environ.get(
-    "FORGE_MODEL_WHITELIST",
-    "anthropic/claude-haiku-4-5,anthropic/claude-3-5-haiku,openai/gpt-4o-mini",
-)
+
+
+def _load_whitelist_default() -> str:
+    wl_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "config", "model-whitelist.txt")
+    try:
+        lines = open(wl_path).read().splitlines()
+        return ",".join(l.strip() for l in lines if l.strip() and not l.strip().startswith("#"))
+    except OSError:
+        return "anthropic/claude-haiku-4-5,anthropic/claude-3-5-haiku,openai/gpt-4o-mini"
+
+
+WHITELIST = os.environ.get("FORGE_MODEL_WHITELIST") or _load_whitelist_default()
 PUBLIC_RESULTS_JSON = os.environ.get("PUBLIC_RESULTS_JSON", "")
 
 ROUNDS = ["round_001", "round_002", "round_003"]