From 8ef6b3ac9fc954513fd9e14af9528dc3f85b2f93 Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Tue, 2 Jun 2026 16:40:29 +0000
Subject: [PATCH 1/9] Add pub-bracket-v24: minimum hollow arm fw=3.6mm for
 pub_001, target ~20.7g
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v21b runs at only 62% stress utilisation (15.55 MPa of 25 MPa) with fw=8mm.
Reduce arm width to minimum hollow (fw=3×mw=3.6mm). Analytical σ=21.5 MPa,
FEA estimate 18.4 MPa (0.855× ratio from v21b). Frame plate unchanged.

Also applies v22's tip-cap fix: inner ends at arm_len-mw to prevent
open-edge stress concentration.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 agents/pub-bracket-v24/agent.py | 131 ++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)
 create mode 100644 agents/pub-bracket-v24/agent.py

diff --git a/agents/pub-bracket-v24/agent.py b/agents/pub-bracket-v24/agent.py
new file mode 100644
index 0000000..cd69898
--- /dev/null
+++ b/agents/pub-bracket-v24/agent.py
@@ -0,0 +1,131 @@
+"""
+pub-bracket-v24: Minimum hollow arm + frame plate for pub_001_medium.
+
+Improvement over v21b: reduces fw from 8mm to 3×mw=3.6mm (minimum hollow
+cross-section) while keeping the proven frame plate and geometry from v21b.
+
+Structural analysis (pub_001: F=464.48N, fw=3.6mm, mw=1.2mm, h=67mm):
+  M = F × arm_len = 464.48 × 87.3 = 40,553 N·mm
+  I = (3.6×67³ − 1.2×64.6³) / 12 = 63,270 mm⁴
+  c = 33.5 mm
+  σ_analytical = 40,553 × 33.5 / 63,270 = 21.5 MPa
+  FEA/analytical ratio from v21b: 15.55/18.2 = 0.855
+  σ_FEA_estimate = 0.855 × 21.5 = 18.4 MPa → 73.6% of 25 MPa ✓
+
+Mesh clearance: inner ceiling = h − mw = 65.8mm, load_z = 38.6mm → 27.2mm >> 10mm ✓
+Arm tip cap: inner ends at arm_len−mw so tip is inherently solid (learned from v22).
+Frame plate: unchanged from v21b (2 interior voids for 3×2 bolt grid).
+
+Estimated mass ≈ 20.7 g (−7.8% from v21b at 22.44 g).
+"""
+
+from __future__ import annotations
+
+import os
+import tempfile
+
+
+def generate(spec: dict) -> bytes:
+    """Return STEP bytes for pub-bracket-v24 (fw=3.6mm arm + frame plate, pub_001)."""
+    from OCP.BRepAlgoAPI import BRepAlgoAPI_Cut, BRepAlgoAPI_Fuse
+    from OCP.BRepPrimAPI import BRepPrimAPI_MakeBox, BRepPrimAPI_MakeCylinder
+    from OCP.Interface import Interface_Static
+    from OCP.STEPControl import STEPControl_AsIs, STEPControl_Writer
+    from OCP.gp import gp_Ax2, gp_Dir, gp_Pnt
+
+    c        = spec["constraints"]
+    bolts    = c["bolt_pattern_mm"]
+    bolt_d   = c["bolt_diameter_clearance_mm"]
+    lp       = c["load_point_mm"]
+    min_wall = c.get("min_wall_thickness_mm", 1.2)
+    bv       = c["build_volume_mm"]
+
+    lx, ly, lz = lp[0], lp[1], lp[2]
+
+    by_cols = sorted(set(p[0] for p in bolts))
+    bz_rows = sorted(set(p[1] for p in bolts))
+    bolt_r  = bolt_d / 2.0
+    strip_w = bolt_r + min_wall
+
+    # Plate margins — identical to v21b
+    bolt_y_span  = max(by_cols) - min(by_cols)
+    bolt_z_span  = max(bz_rows) - min(bz_rows)
+    bvy          = bv[1]
+    bvz          = bv[2]
+    y_half_avail = (bvy - bolt_y_span) / 2.0 - 0.5
+    z_half_avail = (bvz - bolt_z_span) / 2.0 - 0.5
+    margin_y     = min(strip_w, max(0.0, y_half_avail))
+    margin_z     = min(strip_w, max(0.0, z_half_avail))
+    MIN_SLIVER   = 0.5
+    if 0.0 < margin_y - bolt_r < MIN_SLIVER:
+        margin_y = bolt_r - MIN_SLIVER
+    if 0.0 < margin_z - bolt_r < MIN_SLIVER:
+        margin_z = bolt_r - MIN_SLIVER
+
+    plate_t  = min_wall
+    plate_y0 = min(by_cols) - margin_y
+    plate_y1 = max(by_cols) + margin_y
+    plate_z0 = min(bz_rows) - margin_z
+    plate_z1 = max(bz_rows) + margin_z
+
+    # Grid frame plate — same void-cut logic as v21b
+    plate = BRepPrimAPI_MakeBox(
+        gp_Pnt(0.0,     plate_y0, plate_z0),
+        gp_Pnt(plate_t, plate_y1, plate_z1),
+    ).Shape()
+
+    col_gap_starts = [y + strip_w for y in by_cols[:-1]]
+    col_gap_ends   = [y - strip_w for y in by_cols[1:]]
+    row_gap_starts = [z + strip_w for z in bz_rows[:-1]]
+    row_gap_ends   = [z - strip_w for z in bz_rows[1:]]
+
+    for y0, y1 in zip(col_gap_starts, col_gap_ends):
+        for z0, z1 in zip(row_gap_starts, row_gap_ends):
+            if y0 < y1 and z0 < z1:
+                void = BRepPrimAPI_MakeBox(
+                    gp_Pnt(0.0,     y0, z0),
+                    gp_Pnt(plate_t, y1, z1),
+                ).Shape()
+                cut  = BRepAlgoAPI_Cut(plate, void)
+                cut.Build()
+                plate = cut.Shape()
+
+    # Arm: fw = 3×mw = 3.6mm (minimum hollow cross-section)
+    # Inner cavity ends at arm_len−mw (solid tip cap, from v22 learning)
+    fw      = 3.0 * min_wall   # = 3.6mm
+    t_wall  = min_wall
+    h       = 67.0             # proven in v21b (62mm gave mesh divergence)
+    arm_len = max(lx - 12.0, 1.0)
+    arm_len = min(arm_len, bv[0] - 2.0)
+    yc      = ly
+
+    outer = BRepPrimAPI_MakeBox(
+        gp_Pnt(0.0,     yc - fw / 2,            0.0),
+        gp_Pnt(arm_len, yc + fw / 2,            h),
+    ).Shape()
+    # Inner ends at arm_len-t_wall: solid tip cap avoids open-edge stress concentration
+    inner = BRepPrimAPI_MakeBox(
+        gp_Pnt(plate_t,          yc - fw / 2 + t_wall, t_wall),
+        gp_Pnt(arm_len - t_wall, yc + fw / 2 - t_wall, h - t_wall),
+    ).Shape()
+    arm = BRepAlgoAPI_Cut(outer, inner).Shape()
+
+    body = BRepAlgoAPI_Fuse(arm, plate).Shape()
+
+    for by, bz in bolts:
+        ax   = gp_Ax2(gp_Pnt(-1.0, by, bz), gp_Dir(1, 0, 0))
+        hole = BRepPrimAPI_MakeCylinder(ax, bolt_r, plate_t + 2.0).Shape()
+        body = BRepAlgoAPI_Cut(body, hole).Shape()
+
+    writer = STEPControl_Writer()
+    Interface_Static.SetCVal_s("write.step.schema", "AP214IS")
+    writer.Transfer(body, STEPControl_AsIs)
+
+    with tempfile.NamedTemporaryFile(suffix=".step", delete=False) as f:
+        path = f.name
+    try:
+        writer.Write(path)
+        with open(path, "rb") as f:
+            return f.read()
+    finally:
+        os.unlink(path)

From 822bbcb959b0927b60bd11004ab7e611e9c73294 Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Tue, 2 Jun 2026 16:52:16 +0000
Subject: [PATCH 2/9] Add LLM agent contract: sdk, worker injection, example
 agent

---
 .github/workflows/eval.yml  |  4 +-
 CONTRIBUTING.md             | 66 +++++++++++++++++++++++--
 agents/example-llm/agent.py | 96 +++++++++++++++++++++++++++++++++++++
 agents/example-llm/spec.txt |  1 +
 benchmark/_worker.py        | 13 ++++-
 forge/__init__.py           |  0
 forge/sdk/__init__.py       |  3 ++
 forge/sdk/llm.py            | 59 +++++++++++++++++++++++
 requirements.txt            |  1 +
 9 files changed, 238 insertions(+), 5 deletions(-)
 create mode 100644 agents/example-llm/agent.py
 create mode 100644 agents/example-llm/spec.txt
 create mode 100644 forge/__init__.py
 create mode 100644 forge/sdk/__init__.py
 create mode 100644 forge/sdk/llm.py

diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
index 9bcb3da..f8b0179 100644
--- a/.github/workflows/eval.yml
+++ b/.github/workflows/eval.yml
@@ -102,10 +102,12 @@ jobs:
               STEP_FLAG="--step-out /forge/.forge_step_output.step"
             fi
             OUT=$(docker run --rm \
-              --network none \
               --security-opt no-new-privileges \
               --memory 4g \
               --cpus 2 \
+              -e FORGE_LLM_KEY=${{ secrets.FORGE_LLM_KEY }} \
+              -e FORGE_MODEL=${{ secrets.FORGE_MODEL || 'anthropic/claude-haiku-4-5' }} \
+              -e FORGE_MODEL_WHITELIST=${{ vars.FORGE_MODEL_WHITELIST || 'anthropic/claude-haiku-4-5,anthropic/claude-3-5-haiku,openai/gpt-4o-mini' }} \
               -v "${{ github.workspace }}:/forge" \
               forge-eval \
               --agent /forge/${{ steps.agent.outputs.path }} \
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 5bf457e..e0e3f85 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -18,19 +18,79 @@ mkdir agents/<your-name>
 touch agents/<your-name>/agent.py
 ```
 
-Implement the `generate` function:
+Implement the `generate` function. There are two supported signatures:
 
+**Static agent** (no LLM — backward compatible):
 ```python
 def generate(spec: dict) -> bytes:
     """Return STEP file bytes for a part that satisfies spec."""
     ...
 ```
 
+**LLM agent** (recommended):
+```python
+from forge.sdk.llm import LLMClient
+
+def generate(spec: dict, llm: LLMClient) -> bytes:
+    """Return STEP file bytes, using the LLM to reason about geometry."""
+    ...
+```
+
+The harness detects which signature you use via `inspect.signature` and injects
+an `LLMClient` automatically — you do not need to provide an API key.
+
+#### Using the LLM client
+
+`LLMClient` wraps the OpenRouter API:
+
+```python
+response: str = llm.chat(
+    messages=[{"role": "user", "content": "Your prompt here"}],
+    max_tokens=512,
+)
+```
+
+The model is chosen by the harness via `FORGE_MODEL`. During CI, only
+whitelisted models are accepted:
+
+- `anthropic/claude-haiku-4-5`
+- `anthropic/claude-3-5-haiku`
+- `openai/gpt-4o-mini`
+
+Miners do not configure the API key or model — the harness injects both.
+
+#### Observe → Plan → Act pattern
+
+```python
+from forge.sdk.llm import LLMClient
+import json
+
+def generate(spec: dict, llm: LLMClient) -> bytes:
+    # Observe: extract constraints
+    c = spec["constraints"]
+
+    # Plan: ask the LLM to reason about geometry parameters
+    raw = llm.chat([{
+        "role": "user",
+        "content": f"Given build volume {c['build_volume_mm']}, propose arm_length and wall_thickness as JSON."
+    }])
+    dims = json.loads(raw)
+
+    # Act: build the geometry with build123d
+    from build123d import Box, BuildPart
+    with BuildPart() as part:
+        Box(dims["arm_length"], dims["wall_thickness"], dims["wall_thickness"])
+
+    # ... export to STEP and return bytes
+```
+
+See `agents/example-llm/agent.py` for a complete working example.
+
 The agent runs inside a Docker container with these constraints:
 - **Time:** 60 seconds
 - **Memory:** 4 GB
-- **Network:** disabled
-- **Libraries available:** `build123d`, `gmsh`, `numpy`, `scipy`, `OCP`
+- **Network:** enabled (required for LLM API calls)
+- **Libraries available:** `build123d`, `gmsh`, `numpy`, `scipy`, `OCP`, `httpx`
 
 ### 3. Test locally
 
diff --git a/agents/example-llm/agent.py b/agents/example-llm/agent.py
new file mode 100644
index 0000000..d7d04ed
--- /dev/null
+++ b/agents/example-llm/agent.py
@@ -0,0 +1,96 @@
+"""
+Example LLM agent — observe → plan → act.
+
+The harness injects an LLMClient bound to a whitelisted model. This agent
+asks the LLM to propose dimensions for a simple L-bracket, then builds it
+with build123d and returns STEP bytes.
+"""
+
+from __future__ import annotations
+
+import json
+
+from build123d import (
+    Box,
+    BuildPart,
+    Cylinder,
+    Location,
+    Mode,
+    Pos,
+    export_step,
+)
+
+from forge.sdk.llm import LLMClient
+
+
+def generate(spec: dict, llm: LLMClient) -> bytes:
+    # ── Observe ──────────────────────────────────────────────────────────────
+    c = spec["constraints"]
+    bv = c["build_volume_mm"]          # [x, y, z] bounding box
+    load_pt = c["load_point_mm"]       # [x, y, z]
+    bolt_d = c["bolt_diameter_clearance_mm"]
+    min_wall = c["min_wall_thickness_mm"]
+
+    # ── Plan (LLM proposes dimensions) ────────────────────────────────────────
+    prompt = f"""You are a mechanical CAD assistant. Given this bracket spec, propose
+integer dimensions (mm) for a minimal L-bracket with a vertical mount plate and a
+horizontal arm. Reply with ONLY valid JSON, no prose.
+
+Spec:
+  build_volume_mm: {bv}
+  load_point_mm: {load_pt}
+  bolt_clearance_mm: {bolt_d}
+  min_wall_mm: {min_wall}
+
+Return JSON with exactly these keys:
+  arm_length   — horizontal arm length (x-axis), int
+  arm_thickness — arm wall thickness, int >= {max(4, int(min_wall) + 2)}
+  plate_height  — mount plate height (z-axis), int
+  plate_width   — mount plate width (y-axis), int
+  plate_thickness — mount plate thickness (x-axis), int >= {max(4, int(min_wall) + 2)}
+"""
+
+    raw = llm.chat(
+        [{"role": "user", "content": prompt}],
+        max_tokens=256,
+    )
+
+    dims = json.loads(raw)
+    arm_len   = int(dims["arm_length"])
+    arm_t     = int(dims["arm_thickness"])
+    plate_h   = int(dims["plate_height"])
+    plate_w   = int(dims["plate_width"])
+    plate_t   = int(dims["plate_thickness"])
+
+    # Clamp to build volume
+    arm_len = min(arm_len, int(bv[0]) - plate_t)
+    plate_h = min(plate_h, int(bv[2]))
+    plate_w = min(plate_w, int(bv[1]))
+
+    # ── Act (build geometry) ──────────────────────────────────────────────────
+    with BuildPart() as part:
+        # Vertical mount plate at x=0 face
+        with Pos(plate_t / 2, plate_w / 2, plate_h / 2):
+            Box(plate_t, plate_w, plate_h)
+
+        # Horizontal arm extending along +x
+        arm_cx = plate_t + arm_len / 2
+        with Pos(arm_cx, plate_w / 2, arm_t / 2):
+            Box(arm_len, plate_w, arm_t)
+
+        # Clear bolt holes through the mount plate
+        bolt_r = bolt_d / 2
+        for (by, bz) in c["bolt_pattern_mm"]:
+            with Pos(0, by + plate_w / 2 - plate_w / 2, bz):
+                # Cylinder along x-axis
+                Cylinder(bolt_r, plate_t, mode=Mode.SUBTRACT,
+                         rotation=(0, 90, 0))
+
+    import tempfile, os
+    with tempfile.NamedTemporaryFile(suffix=".step", delete=False) as f:
+        path = f.name
+    try:
+        export_step(part.part, path)
+        return open(path, "rb").read()
+    finally:
+        os.unlink(path)
diff --git a/agents/example-llm/spec.txt b/agents/example-llm/spec.txt
new file mode 100644
index 0000000..30cda4b
--- /dev/null
+++ b/agents/example-llm/spec.txt
@@ -0,0 +1 @@
+pub_001_medium
\ No newline at end of file
diff --git a/benchmark/_worker.py b/benchmark/_worker.py
index 22d8f95..d121200 100644
--- a/benchmark/_worker.py
+++ b/benchmark/_worker.py
@@ -15,12 +15,16 @@
 
 import argparse
 import importlib.util
+import inspect
 import json
 import os
 import resource
 import sys
 from pathlib import Path
 
+# Make forge.sdk importable regardless of install state.
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
 CPU_SECONDS = 150
 
 
@@ -53,7 +57,14 @@ def main() -> None:
 
     try:
         loader_spec.loader.exec_module(mod)
-        step_bytes = mod.generate(spec)
+
+        sig = inspect.signature(mod.generate)
+        if len(sig.parameters) >= 2:
+            from forge.sdk.llm import LLMClient
+            llm = LLMClient()
+            step_bytes = mod.generate(spec, llm)
+        else:
+            step_bytes = mod.generate(spec)
     except Exception as exc:
         print(f"{type(exc).__name__}: {exc}", file=sys.stderr)
         sys.exit(1)
diff --git a/forge/__init__.py b/forge/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/forge/sdk/__init__.py b/forge/sdk/__init__.py
new file mode 100644
index 0000000..e60c25c
--- /dev/null
+++ b/forge/sdk/__init__.py
@@ -0,0 +1,3 @@
+from forge.sdk.llm import LLMClient
+
+__all__ = ["LLMClient"]
diff --git a/forge/sdk/llm.py b/forge/sdk/llm.py
new file mode 100644
index 0000000..55c36a5
--- /dev/null
+++ b/forge/sdk/llm.py
@@ -0,0 +1,59 @@
+"""
+LLM client for Forge agents.
+
+Reads configuration from environment:
+  FORGE_LLM_KEY          — OpenRouter API key (required at chat time)
+  FORGE_MODEL            — model ID to use (e.g. "anthropic/claude-haiku-4-5")
+  FORGE_MODEL_WHITELIST  — comma-separated allowed model IDs; omit to allow any
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Any
+
+import httpx
+
+OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
+
+
+class LLMClient:
+    def __init__(self) -> None:
+        self._key = os.environ.get("FORGE_LLM_KEY", "").strip()
+        self._model = os.environ.get("FORGE_MODEL", "").strip()
+
+        raw_whitelist = os.environ.get("FORGE_MODEL_WHITELIST", "").strip()
+        self._whitelist: list[str] | None = (
+            [m.strip() for m in raw_whitelist.split(",") if m.strip()]
+            if raw_whitelist
+            else None
+        )
+
+        if self._whitelist is not None and self._model not in self._whitelist:
+            raise ValueError(
+                f"Model {self._model!r} is not in the whitelist: {self._whitelist}"
+            )
+
+    @property
+    def model(self) -> str:
+        return self._model
+
+    def chat(self, messages: list[dict[str, Any]], max_tokens: int = 4096) -> str:
+        if not self._key:
+            raise RuntimeError("No LLM key configured — set FORGE_LLM_KEY")
+
+        response = httpx.post(
+            OPENROUTER_URL,
+            headers={
+                "Authorization": f"Bearer {self._key}",
+                "Content-Type": "application/json",
+            },
+            json={
+                "model": self._model,
+                "messages": messages,
+                "max_tokens": max_tokens,
+            },
+            timeout=60.0,
+        )
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"]
diff --git a/requirements.txt b/requirements.txt
index 08ef611..7e2a32b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 build123d>=0.8.0
 gmsh>=4.12.0
+httpx>=0.27.0
 numpy>=1.26.0

From 67e9098a3a5726dd8ae849d3edb6471167cd2a1d Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Tue, 2 Jun 2026 17:21:41 +0000
Subject: [PATCH 3/9] Move example-llm to examples/, fallback LLM key to
 OPENROUTER_KEY

---
 .github/workflows/eval.yml                          | 2 +-
 CONTRIBUTING.md                                     | 2 +-
 {agents/example-llm => examples/llm-agent}/agent.py | 0
 {agents/example-llm => examples/llm-agent}/spec.txt | 0
 4 files changed, 2 insertions(+), 2 deletions(-)
 rename {agents/example-llm => examples/llm-agent}/agent.py (100%)
 rename {agents/example-llm => examples/llm-agent}/spec.txt (100%)

diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
index f8b0179..66a67f6 100644
--- a/.github/workflows/eval.yml
+++ b/.github/workflows/eval.yml
@@ -105,7 +105,7 @@ jobs:
               --security-opt no-new-privileges \
               --memory 4g \
               --cpus 2 \
-              -e FORGE_LLM_KEY=${{ secrets.FORGE_LLM_KEY }} \
+              -e FORGE_LLM_KEY=${{ secrets.FORGE_LLM_KEY || secrets.OPENROUTER_KEY }} \
               -e FORGE_MODEL=${{ secrets.FORGE_MODEL || 'anthropic/claude-haiku-4-5' }} \
               -e FORGE_MODEL_WHITELIST=${{ vars.FORGE_MODEL_WHITELIST || 'anthropic/claude-haiku-4-5,anthropic/claude-3-5-haiku,openai/gpt-4o-mini' }} \
               -v "${{ github.workspace }}:/forge" \
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e0e3f85..835e0d6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -84,7 +84,7 @@ def generate(spec: dict, llm: LLMClient) -> bytes:
     # ... export to STEP and return bytes
 ```
 
-See `agents/example-llm/agent.py` for a complete working example.
+See `examples/llm-agent/agent.py` for a complete working example.
 
 The agent runs inside a Docker container with these constraints:
 - **Time:** 60 seconds
diff --git a/agents/example-llm/agent.py b/examples/llm-agent/agent.py
similarity index 100%
rename from agents/example-llm/agent.py
rename to examples/llm-agent/agent.py
diff --git a/agents/example-llm/spec.txt b/examples/llm-agent/spec.txt
similarity index 100%
rename from agents/example-llm/spec.txt
rename to examples/llm-agent/spec.txt

From 886a1fe4c46830de92e8405f22b59a6481169420 Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Tue, 2 Jun 2026 17:28:37 +0000
Subject: [PATCH 4/9] Remove pub-bracket-v24 from platform PR

---
 agents/pub-bracket-v24/agent.py | 131 --------------------------------
 1 file changed, 131 deletions(-)
 delete mode 100644 agents/pub-bracket-v24/agent.py

diff --git a/agents/pub-bracket-v24/agent.py b/agents/pub-bracket-v24/agent.py
deleted file mode 100644
index cd69898..0000000
--- a/agents/pub-bracket-v24/agent.py
+++ /dev/null
@@ -1,131 +0,0 @@
-"""
-pub-bracket-v24: Minimum hollow arm + frame plate for pub_001_medium.
-
-Improvement over v21b: reduces fw from 8mm to 3×mw=3.6mm (minimum hollow
-cross-section) while keeping the proven frame plate and geometry from v21b.
-
-Structural analysis (pub_001: F=464.48N, fw=3.6mm, mw=1.2mm, h=67mm):
-  M = F × arm_len = 464.48 × 87.3 = 40,553 N·mm
-  I = (3.6×67³ − 1.2×64.6³) / 12 = 63,270 mm⁴
-  c = 33.5 mm
-  σ_analytical = 40,553 × 33.5 / 63,270 = 21.5 MPa
-  FEA/analytical ratio from v21b: 15.55/18.2 = 0.855
-  σ_FEA_estimate = 0.855 × 21.5 = 18.4 MPa → 73.6% of 25 MPa ✓
-
-Mesh clearance: inner ceiling = h − mw = 65.8mm, load_z = 38.6mm → 27.2mm >> 10mm ✓
-Arm tip cap: inner ends at arm_len−mw so tip is inherently solid (learned from v22).
-Frame plate: unchanged from v21b (2 interior voids for 3×2 bolt grid).
-
-Estimated mass ≈ 20.7 g (−7.8% from v21b at 22.44 g).
-"""
-
-from __future__ import annotations
-
-import os
-import tempfile
-
-
-def generate(spec: dict) -> bytes:
-    """Return STEP bytes for pub-bracket-v24 (fw=3.6mm arm + frame plate, pub_001)."""
-    from OCP.BRepAlgoAPI import BRepAlgoAPI_Cut, BRepAlgoAPI_Fuse
-    from OCP.BRepPrimAPI import BRepPrimAPI_MakeBox, BRepPrimAPI_MakeCylinder
-    from OCP.Interface import Interface_Static
-    from OCP.STEPControl import STEPControl_AsIs, STEPControl_Writer
-    from OCP.gp import gp_Ax2, gp_Dir, gp_Pnt
-
-    c        = spec["constraints"]
-    bolts    = c["bolt_pattern_mm"]
-    bolt_d   = c["bolt_diameter_clearance_mm"]
-    lp       = c["load_point_mm"]
-    min_wall = c.get("min_wall_thickness_mm", 1.2)
-    bv       = c["build_volume_mm"]
-
-    lx, ly, lz = lp[0], lp[1], lp[2]
-
-    by_cols = sorted(set(p[0] for p in bolts))
-    bz_rows = sorted(set(p[1] for p in bolts))
-    bolt_r  = bolt_d / 2.0
-    strip_w = bolt_r + min_wall
-
-    # Plate margins — identical to v21b
-    bolt_y_span  = max(by_cols) - min(by_cols)
-    bolt_z_span  = max(bz_rows) - min(bz_rows)
-    bvy          = bv[1]
-    bvz          = bv[2]
-    y_half_avail = (bvy - bolt_y_span) / 2.0 - 0.5
-    z_half_avail = (bvz - bolt_z_span) / 2.0 - 0.5
-    margin_y     = min(strip_w, max(0.0, y_half_avail))
-    margin_z     = min(strip_w, max(0.0, z_half_avail))
-    MIN_SLIVER   = 0.5
-    if 0.0 < margin_y - bolt_r < MIN_SLIVER:
-        margin_y = bolt_r - MIN_SLIVER
-    if 0.0 < margin_z - bolt_r < MIN_SLIVER:
-        margin_z = bolt_r - MIN_SLIVER
-
-    plate_t  = min_wall
-    plate_y0 = min(by_cols) - margin_y
-    plate_y1 = max(by_cols) + margin_y
-    plate_z0 = min(bz_rows) - margin_z
-    plate_z1 = max(bz_rows) + margin_z
-
-    # Grid frame plate — same void-cut logic as v21b
-    plate = BRepPrimAPI_MakeBox(
-        gp_Pnt(0.0,     plate_y0, plate_z0),
-        gp_Pnt(plate_t, plate_y1, plate_z1),
-    ).Shape()
-
-    col_gap_starts = [y + strip_w for y in by_cols[:-1]]
-    col_gap_ends   = [y - strip_w for y in by_cols[1:]]
-    row_gap_starts = [z + strip_w for z in bz_rows[:-1]]
-    row_gap_ends   = [z - strip_w for z in bz_rows[1:]]
-
-    for y0, y1 in zip(col_gap_starts, col_gap_ends):
-        for z0, z1 in zip(row_gap_starts, row_gap_ends):
-            if y0 < y1 and z0 < z1:
-                void = BRepPrimAPI_MakeBox(
-                    gp_Pnt(0.0,     y0, z0),
-                    gp_Pnt(plate_t, y1, z1),
-                ).Shape()
-                cut  = BRepAlgoAPI_Cut(plate, void)
-                cut.Build()
-                plate = cut.Shape()
-
-    # Arm: fw = 3×mw = 3.6mm (minimum hollow cross-section)
-    # Inner cavity ends at arm_len−mw (solid tip cap, from v22 learning)
-    fw      = 3.0 * min_wall   # = 3.6mm
-    t_wall  = min_wall
-    h       = 67.0             # proven in v21b (62mm gave mesh divergence)
-    arm_len = max(lx - 12.0, 1.0)
-    arm_len = min(arm_len, bv[0] - 2.0)
-    yc      = ly
-
-    outer = BRepPrimAPI_MakeBox(
-        gp_Pnt(0.0,     yc - fw / 2,            0.0),
-        gp_Pnt(arm_len, yc + fw / 2,            h),
-    ).Shape()
-    # Inner ends at arm_len-t_wall: solid tip cap avoids open-edge stress concentration
-    inner = BRepPrimAPI_MakeBox(
-        gp_Pnt(plate_t,          yc - fw / 2 + t_wall, t_wall),
-        gp_Pnt(arm_len - t_wall, yc + fw / 2 - t_wall, h - t_wall),
-    ).Shape()
-    arm = BRepAlgoAPI_Cut(outer, inner).Shape()
-
-    body = BRepAlgoAPI_Fuse(arm, plate).Shape()
-
-    for by, bz in bolts:
-        ax   = gp_Ax2(gp_Pnt(-1.0, by, bz), gp_Dir(1, 0, 0))
-        hole = BRepPrimAPI_MakeCylinder(ax, bolt_r, plate_t + 2.0).Shape()
-        body = BRepAlgoAPI_Cut(body, hole).Shape()
-
-    writer = STEPControl_Writer()
-    Interface_Static.SetCVal_s("write.step.schema", "AP214IS")
-    writer.Transfer(body, STEPControl_AsIs)
-
-    with tempfile.NamedTemporaryFile(suffix=".step", delete=False) as f:
-        path = f.name
-    try:
-        writer.Write(path)
-        with open(path, "rb") as f:
-            return f.read()
-    finally:
-        os.unlink(path)

From ecae612de53eb261c84634c680dd245d5799ece6 Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Tue, 2 Jun 2026 17:29:40 +0000
Subject: [PATCH 5/9] Trigger CI: no agent eval, verify infra changes pass

---
 agents/.keep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 agents/.keep

diff --git a/agents/.keep b/agents/.keep
new file mode 100644
index 0000000..e69de29

From a02705ec415d2d9260235033b280f7aca917ca74 Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Tue, 2 Jun 2026 17:31:51 +0000
Subject: [PATCH 6/9] Pass LLM env defaults in forge eval for LLM agents

---
 cli.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/cli.py b/cli.py
index 6e99c06..5baeb1b 100644
--- a/cli.py
+++ b/cli.py
@@ -472,8 +472,15 @@ def _run_evaluate(agent_path: str, spec_path: str, verbose: bool) -> dict:
         "--spec", spec_path,
         "--json",
     ]
+    # Inherit environment; supply defaults so LLM agents work without extra setup.
+    env = os.environ.copy()
+    env.setdefault("FORGE_MODEL", "anthropic/claude-haiku-4-5")
+    env.setdefault(
+        "FORGE_MODEL_WHITELIST",
+        "anthropic/claude-haiku-4-5,anthropic/claude-3-5-haiku,openai/gpt-4o-mini",
+    )
     try:
-        proc = subprocess.run(cmd, capture_output=True, text=True, cwd=str(ROOT))
+        proc = subprocess.run(cmd, capture_output=True, text=True, cwd=str(ROOT), env=env)
     except FileNotFoundError:
         return {"passed": False, "stage": "error", "reason": "benchmark module not found — run from repo root"}
 

From 6092269a6c687b805e4075c88976dc2de1f4631d Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Tue, 2 Jun 2026 18:19:52 +0000
Subject: [PATCH 7/9] Update docs: LLM agent interface, current SOTA, network
 note

---
 QUICKSTART.md            | 28 ++++++++++++++++--------
 README.md                | 37 +++++++++++++++++++++-----------
 agents/template/agent.py | 46 +++++++++++++++++++++++-----------------
 3 files changed, 70 insertions(+), 41 deletions(-)

diff --git a/QUICKSTART.md b/QUICKSTART.md
index 652ea39..63f0d53 100644
--- a/QUICKSTART.md
+++ b/QUICKSTART.md
@@ -75,21 +75,31 @@ curl http://143.244.191.193:8000/specs/001_bracket
 cp -r agents/template agents/<your-name>
 ```
 
-Edit `agents/<your-name>/agent.py`. The only contract:
+Edit `agents/<your-name>/agent.py`. Two supported signatures:
 
+**Static agent** (no LLM):
 ```python
 def generate(spec: dict) -> bytes:
-    """
-    Takes the spec dict (load, bolt pattern, build volume, material).
-    Returns STEP file bytes for your design.
-    """
+    """Takes the spec dict, returns STEP file bytes."""
     ...
 ```
 
-See `agents/taper-beam/agent.py` for a clean I-beam reference implementation (~38g).
-See `agents/lean-arm/agent.py` for the I-beam baseline (~32g).
-See `agents/pocket-plate/agent.py` for the wall-pocketing approach (~30g).
-See `agents/compact-arm/agent.py` for the current SOTA (~27g).
+**LLM agent** (recommended — harness injects the client):
+```python
+from forge.sdk.llm import LLMClient
+
+def generate(spec: dict, llm: LLMClient) -> bytes:
+    """Use the LLM to reason about geometry, then return STEP bytes."""
+    response = llm.chat([{"role": "user", "content": "..."}])
+    ...
+```
+
+No API key needed — the harness injects `LLMClient` automatically using whitelisted models. See `examples/llm-agent/agent.py` for a complete working example.
+
+Reference implementations in `agents/`:
+- `taper-beam/` — clean I-beam (~38g)
+- `lean-arm/` — I-beam baseline (~32g)
+- `compact-arm/` — pocketed arm approach
 
 ---
 
diff --git a/README.md b/README.md
index f38f8c1..a576458 100644
--- a/README.md
+++ b/README.md
@@ -63,7 +63,7 @@ forge eval agents/<your-name>/agent.py
 ## Submitting
 
 1. Fork this repo.
-2. Create `agents/<your-name>/agent.py` with a `generate(spec: dict) -> bytes` function.
+2. Create `agents/<your-name>/agent.py` with a `generate(spec, [llm]) -> bytes` function.
 3. Open a PR. CI scores your design automatically (~2 min) and posts:
    ```
    ## Forge Eval — NEW LEADER 🏆
@@ -77,18 +77,30 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for full guidelines.
 
 ## Agent interface
 
+Two supported signatures — the harness detects which one you use automatically:
+
+**Static agent** (no LLM):
 ```python
 def generate(spec: dict) -> bytes:
-    """
-    Takes the spec dict (load, bolt pattern, build volume, material).
-    Returns STEP file bytes for your design.
+    """Build and return STEP file bytes for the given spec."""
+    ...
+```
 
-    Sandbox: 60s timeout, 4GB RAM, no network access.
-    """
+**LLM agent** (recommended):
+```python
+from forge.sdk.llm import LLMClient
+
+def generate(spec: dict, llm: LLMClient) -> bytes:
+    """Use the LLM to reason about geometry, then return STEP bytes."""
+    response = llm.chat([{"role": "user", "content": "..."}])
     ...
 ```
 
-Libraries available in eval: `build123d`, `OCP`, `gmsh`, `numpy`, `scipy`. See agents/ for reference implementations.
+The harness injects `LLMClient` automatically — no API key required. Whitelisted models: `claude-haiku-4-5`, `claude-3-5-haiku`, `gpt-4o-mini`. See `examples/llm-agent/` for a complete working example.
+
+Sandbox constraints: **60s timeout · 4 GB RAM · network enabled (LLM calls only)**
+
+Libraries available: `build123d`, `OCP`, `gmsh`, `numpy`, `scipy`, `httpx`. See `agents/` for reference implementations.
 
 ---
 
@@ -135,11 +147,12 @@ All CPU. No GPU required.
 
 Live: http://143.244.191.193:8000/sota
 
-| Spec | Score | Agent | FEA Stress |
-|---|---|---|---|
-| 001 Wall Bracket | **27.22 g** | compact-arm | 13.8 / 25.0 MPa |
-| 002 Equipment Mount | — | — | — |
-| 003 Pipe-Clamp | 2799.52 g | baseline_steel | 22.18 / 82.0 MPa |
+| Spec | Score | Agent |
+|---|---|---|
+| spec-001 Wall Bracket | **23.48 g** | sub-nano |
+| spec-002 Equipment Mount | **25.84 g** | al-bracket-v19 |
+| spec-003 Pipe-Clamp | **71.42 g** | ss-bracket-v15 |
+| pub_001 – pub_005 | see leaderboard | various |
 
 ---
 
diff --git a/agents/template/agent.py b/agents/template/agent.py
index 0bb7c38..d7abfdb 100644
--- a/agents/template/agent.py
+++ b/agents/template/agent.py
@@ -1,17 +1,23 @@
 """
 Template agent — start here.
 
-Contract: implement generate(spec) -> bytes (STEP file).
+Two supported signatures:
 
-The eval harness calls generate() with the spec dict for each problem.
-Return valid STEP bytes. The harness handles geometry checks and FEA;
-your job is to return the lightest part that passes all constraints.
+    generate(spec: dict) -> bytes              # static agent
+    generate(spec: dict, llm: LLMClient) -> bytes  # LLM agent (recommended)
 
-See QUICKSTART.md for a full walkthrough.
+The harness detects which you use via inspect.signature and injects LLMClient
+automatically if present — no API key required from you.
+
+See QUICKSTART.md for a full walkthrough and examples/llm-agent/ for an
+LLM agent example.
 """
 
 from __future__ import annotations
 
+# To use the LLM client, uncomment:
+# from forge.sdk.llm import LLMClient
+
 # TODO: import your geometry library
 # from build123d import ...          # recommended
 # from OCP.BRepPrimAPI import ...    # raw OCP (see agents/baseline/)
@@ -21,27 +27,27 @@ def generate(spec: dict) -> bytes:
     """
     Build and return a STEP file for the given spec.
 
+    To use an LLM, change the signature to: generate(spec, llm: LLMClient)
+
     Args:
-        spec: Problem specification dict. Key structure:
-            spec["constraints"]["load_n"]               — applied load in Newtons
-            spec["constraints"]["load_point_mm"]        — [x, y, z] load application point
-            spec["constraints"]["build_volume_mm"]      — [x, y, z] max bounding box
-            spec["constraints"]["bolt_pattern_mm"]      — [[y, z], ...] bolt hole centers (x=0 plane)
-            spec["constraints"]["bolt_diameter_clearance_mm"] — minimum clearance diameter
-            spec["constraints"]["min_wall_thickness_mm"] — minimum feature wall
-            spec["constraints"]["max_overhang_deg"]     — max overhang from vertical
-            spec["material"]                            — material name (see benchmark/materials.py)
+        spec: Problem specification dict. Key fields:
+            spec["constraints"]["load_n"]               — load in Newtons
+            spec["constraints"]["load_point_mm"]        — [x, y, z] load point
+            spec["constraints"]["build_volume_mm"]      — [x, y, z] bounding box
+            spec["constraints"]["bolt_pattern_mm"]      — [[y, z], ...] bolt centers
+            spec["constraints"]["bolt_diameter_clearance_mm"] — hole clearance
+            spec["constraints"]["min_wall_thickness_mm"] — minimum wall
+            spec["constraints"]["max_overhang_deg"]     — max printable overhang
+            spec["material"]                            — material name
             spec["safety_factor"]                       — FEA stress safety factor
+            spec["scoring"]["metric"]                   — "mass_grams" | "volume_mm3" | ...
 
     Returns:
-        STEP file as raw bytes. Must be valid AP214IS STEP.
+        STEP file as raw bytes (AP214IS schema required).
 
     Notes:
-        - Must be deterministic: same spec → same bytes every call.
-          If you use any randomness, fix the seed (e.g. random.seed(42)).
-        - The FEA mesh uses C3D4 linear tets at ~2 mm characteristic length.
-          Avoid features thinner than 3 mm — they produce degenerate elements.
-        - Lower mass = better score. There is no ceiling; keep optimizing.
+        - Must be deterministic: same spec → same bytes. Fix any random seeds.
+        - Avoid features thinner than 3 mm — they produce degenerate FEA elements.
     """
 
     constraints = spec["constraints"]

From 132a9e362f4b0942caf2a7e5df45f161c6daa588 Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Tue, 2 Jun 2026 18:32:57 +0000
Subject: [PATCH 8/9] Skip template agent in eval CI

---
 .github/workflows/eval.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
index 66a67f6..8d9c9a8 100644
--- a/.github/workflows/eval.yml
+++ b/.github/workflows/eval.yml
@@ -21,7 +21,7 @@ jobs:
         id: agent
         run: |
           AGENT=$(git diff --name-only origin/${{ github.base_ref }}...HEAD \
-            | grep '^agents/.*/agent\.py$' | head -1)
+            | grep '^agents/.*/agent\.py$' | grep -v '^agents/template/' | head -1)
           if [ -z "$AGENT" ]; then
             echo "No agent.py changed — skipping eval."
             echo "found=false" >> "$GITHUB_OUTPUT"

From c946e730491575f578ba0d907072b674b2ccc584 Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Tue, 2 Jun 2026 18:35:01 +0000
Subject: [PATCH 9/9] Update PR template for LLM agents and forge CLI

---
 .github/PULL_REQUEST_TEMPLATE.md | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 283cd0e..88f42ee 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -4,7 +4,7 @@
 
 ## Spec
 
-<!-- Which spec does this target? (e.g., 001_bracket) -->
+<!-- Which spec does this target? (e.g., pub_001_medium) -->
 
 ## Score
 
@@ -12,11 +12,12 @@
 
 ## Approach
 
-<!-- Brief technical description: topology optimization? parametric? lattice? -->
+<!-- Brief technical description: topology optimization? parametric? LLM-guided? lattice? -->
 
 ## Checklist
 
-- [ ] `agents/<my-name>/agent.py` implements `generate(spec) -> bytes`
-- [ ] Local eval passes: `docker run ... --agent agents/<my-name>/agent.py --spec specs/001_bracket.json`
-- [ ] No external network calls in `generate()`
-- [ ] Agent is deterministic (same output for same spec)
+- [ ] `agents/<my-name>/agent.py` implements `generate(spec) -> bytes` or `generate(spec, llm) -> bytes`
+- [ ] `agents/<my-name>/spec.txt` contains the target spec ID (e.g., `pub_001_medium`)
+- [ ] Local eval passes: `forge eval agents/<my-name>/agent.py`
+- [ ] Agent is deterministic (same spec → same bytes; fix any random seeds)
+- [ ] LLM agents: using an injected `LLMClient`, not a hardcoded API key