From 8ef6b3ac9fc954513fd9e14af9528dc3f85b2f93 Mon Sep 17 00:00:00 2001 From: Punch Date: Tue, 2 Jun 2026 16:40:29 +0000 Subject: [PATCH 1/9] Add pub-bracket-v24: minimum hollow arm fw=3.6mm for pub_001, target ~20.7g MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v21b runs at only 62% stress utilisation (15.55 MPa of 25 MPa) with fw=8mm. Reduce arm width to minimum hollow (fw=3×mw=3.6mm). Analytical σ=21.5 MPa, FEA estimate 18.4 MPa (0.855× ratio from v21b). Frame plate unchanged. Also applies v22's tip-cap fix: inner ends at arm_len-mw to prevent open-edge stress concentration. Co-Authored-By: Claude Sonnet 4.6 --- agents/pub-bracket-v24/agent.py | 131 ++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 agents/pub-bracket-v24/agent.py diff --git a/agents/pub-bracket-v24/agent.py b/agents/pub-bracket-v24/agent.py new file mode 100644 index 0000000..cd69898 --- /dev/null +++ b/agents/pub-bracket-v24/agent.py @@ -0,0 +1,131 @@ +""" +pub-bracket-v24: Minimum hollow arm + frame plate for pub_001_medium. + +Improvement over v21b: reduces fw from 8mm to 3×mw=3.6mm (minimum hollow +cross-section) while keeping the proven frame plate and geometry from v21b. + +Structural analysis (pub_001: F=464.48N, fw=3.6mm, mw=1.2mm, h=67mm): + M = F × arm_len = 464.48 × 87.3 = 40,553 N·mm + I = (3.6×67³ − 1.2×64.6³) / 12 = 63,270 mm⁴ + c = 33.5 mm + σ_analytical = 40,553 × 33.5 / 63,270 = 21.5 MPa + FEA/analytical ratio from v21b: 15.55/18.2 = 0.855 + σ_FEA_estimate = 0.855 × 21.5 = 18.4 MPa → 73.6% of 25 MPa ✓ + +Mesh clearance: inner ceiling = h − mw = 65.8mm, load_z = 38.6mm → 27.2mm >> 10mm ✓ +Arm tip cap: inner ends at arm_len−mw so tip is inherently solid (learned from v22). +Frame plate: unchanged from v21b (2 interior voids for 3×2 bolt grid). + +Estimated mass ≈ 20.7 g (−7.8% from v21b at 22.44 g). +""" + +from __future__ import annotations + +import os +import tempfile + + +def generate(spec: dict) -> bytes: + """Return STEP bytes for pub-bracket-v24 (fw=3.6mm arm + frame plate, pub_001).""" + from OCP.BRepAlgoAPI import BRepAlgoAPI_Cut, BRepAlgoAPI_Fuse + from OCP.BRepPrimAPI import BRepPrimAPI_MakeBox, BRepPrimAPI_MakeCylinder + from OCP.Interface import Interface_Static + from OCP.STEPControl import STEPControl_AsIs, STEPControl_Writer + from OCP.gp import gp_Ax2, gp_Dir, gp_Pnt + + c = spec["constraints"] + bolts = c["bolt_pattern_mm"] + bolt_d = c["bolt_diameter_clearance_mm"] + lp = c["load_point_mm"] + min_wall = c.get("min_wall_thickness_mm", 1.2) + bv = c["build_volume_mm"] + + lx, ly, lz = lp[0], lp[1], lp[2] + + by_cols = sorted(set(p[0] for p in bolts)) + bz_rows = sorted(set(p[1] for p in bolts)) + bolt_r = bolt_d / 2.0 + strip_w = bolt_r + min_wall + + # Plate margins — identical to v21b + bolt_y_span = max(by_cols) - min(by_cols) + bolt_z_span = max(bz_rows) - min(bz_rows) + bvy = bv[1] + bvz = bv[2] + y_half_avail = (bvy - bolt_y_span) / 2.0 - 0.5 + z_half_avail = (bvz - bolt_z_span) / 2.0 - 0.5 + margin_y = min(strip_w, max(0.0, y_half_avail)) + margin_z = min(strip_w, max(0.0, z_half_avail)) + MIN_SLIVER = 0.5 + if 0.0 < margin_y - bolt_r < MIN_SLIVER: + margin_y = bolt_r - MIN_SLIVER + if 0.0 < margin_z - bolt_r < MIN_SLIVER: + margin_z = bolt_r - MIN_SLIVER + + plate_t = min_wall + plate_y0 = min(by_cols) - margin_y + plate_y1 = max(by_cols) + margin_y + plate_z0 = min(bz_rows) - margin_z + plate_z1 = max(bz_rows) + margin_z + + # Grid frame plate — same void-cut logic as v21b + plate = BRepPrimAPI_MakeBox( + gp_Pnt(0.0, plate_y0, plate_z0), + gp_Pnt(plate_t, plate_y1, plate_z1), + ).Shape() + + col_gap_starts = [y + strip_w for y in by_cols[:-1]] + col_gap_ends = [y - strip_w for y in by_cols[1:]] + row_gap_starts = [z + strip_w for z in bz_rows[:-1]] + row_gap_ends = [z - strip_w for z in bz_rows[1:]] + + for y0, y1 in zip(col_gap_starts, col_gap_ends): + for z0, z1 in zip(row_gap_starts, row_gap_ends): + if y0 < y1 and z0 < z1: + void = BRepPrimAPI_MakeBox( + gp_Pnt(0.0, y0, z0), + gp_Pnt(plate_t, y1, z1), + ).Shape() + cut = BRepAlgoAPI_Cut(plate, void) + cut.Build() + plate = cut.Shape() + + # Arm: fw = 3×mw = 3.6mm (minimum hollow cross-section) + # Inner cavity ends at arm_len−mw (solid tip cap, from v22 learning) + fw = 3.0 * min_wall # = 3.6mm + t_wall = min_wall + h = 67.0 # proven in v21b (62mm gave mesh divergence) + arm_len = max(lx - 12.0, 1.0) + arm_len = min(arm_len, bv[0] - 2.0) + yc = ly + + outer = BRepPrimAPI_MakeBox( + gp_Pnt(0.0, yc - fw / 2, 0.0), + gp_Pnt(arm_len, yc + fw / 2, h), + ).Shape() + # Inner ends at arm_len-t_wall: solid tip cap avoids open-edge stress concentration + inner = BRepPrimAPI_MakeBox( + gp_Pnt(plate_t, yc - fw / 2 + t_wall, t_wall), + gp_Pnt(arm_len - t_wall, yc + fw / 2 - t_wall, h - t_wall), + ).Shape() + arm = BRepAlgoAPI_Cut(outer, inner).Shape() + + body = BRepAlgoAPI_Fuse(arm, plate).Shape() + + for by, bz in bolts: + ax = gp_Ax2(gp_Pnt(-1.0, by, bz), gp_Dir(1, 0, 0)) + hole = BRepPrimAPI_MakeCylinder(ax, bolt_r, plate_t + 2.0).Shape() + body = BRepAlgoAPI_Cut(body, hole).Shape() + + writer = STEPControl_Writer() + Interface_Static.SetCVal_s("write.step.schema", "AP214IS") + writer.Transfer(body, STEPControl_AsIs) + + with tempfile.NamedTemporaryFile(suffix=".step", delete=False) as f: + path = f.name + try: + writer.Write(path) + with open(path, "rb") as f: + return f.read() + finally: + os.unlink(path) From 822bbcb959b0927b60bd11004ab7e611e9c73294 Mon Sep 17 00:00:00 2001 From: Punch Date: Tue, 2 Jun 2026 16:52:16 +0000 Subject: [PATCH 2/9] Add LLM agent contract: sdk, worker injection, example agent --- .github/workflows/eval.yml | 4 +- CONTRIBUTING.md | 66 +++++++++++++++++++++++-- agents/example-llm/agent.py | 96 +++++++++++++++++++++++++++++++++++++ agents/example-llm/spec.txt | 1 + benchmark/_worker.py | 13 ++++- forge/__init__.py | 0 forge/sdk/__init__.py | 3 ++ forge/sdk/llm.py | 59 +++++++++++++++++++++++ requirements.txt | 1 + 9 files changed, 238 insertions(+), 5 deletions(-) create mode 100644 agents/example-llm/agent.py create mode 100644 agents/example-llm/spec.txt create mode 100644 forge/__init__.py create mode 100644 forge/sdk/__init__.py create mode 100644 forge/sdk/llm.py diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml index 9bcb3da..f8b0179 100644 --- a/.github/workflows/eval.yml +++ b/.github/workflows/eval.yml @@ -102,10 +102,12 @@ jobs: STEP_FLAG="--step-out /forge/.forge_step_output.step" fi OUT=$(docker run --rm \ - --network none \ --security-opt no-new-privileges \ --memory 4g \ --cpus 2 \ + -e FORGE_LLM_KEY=${{ secrets.FORGE_LLM_KEY }} \ + -e FORGE_MODEL=${{ secrets.FORGE_MODEL || 'anthropic/claude-haiku-4-5' }} \ + -e FORGE_MODEL_WHITELIST=${{ vars.FORGE_MODEL_WHITELIST || 'anthropic/claude-haiku-4-5,anthropic/claude-3-5-haiku,openai/gpt-4o-mini' }} \ -v "${{ github.workspace }}:/forge" \ forge-eval \ --agent /forge/${{ steps.agent.outputs.path }} \ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5bf457e..e0e3f85 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -18,19 +18,79 @@ mkdir agents/ touch agents//agent.py ``` -Implement the `generate` function: +Implement the `generate` function. There are two supported signatures: +**Static agent** (no LLM — backward compatible): ```python def generate(spec: dict) -> bytes: """Return STEP file bytes for a part that satisfies spec.""" ... ``` +**LLM agent** (recommended): +```python +from forge.sdk.llm import LLMClient + +def generate(spec: dict, llm: LLMClient) -> bytes: + """Return STEP file bytes, using the LLM to reason about geometry.""" + ... +``` + +The harness detects which signature you use via `inspect.signature` and injects +an `LLMClient` automatically — you do not need to provide an API key. + +#### Using the LLM client + +`LLMClient` wraps the OpenRouter API: + +```python +response: str = llm.chat( + messages=[{"role": "user", "content": "Your prompt here"}], + max_tokens=512, +) +``` + +The model is chosen by the harness via `FORGE_MODEL`. During CI, only +whitelisted models are accepted: + +- `anthropic/claude-haiku-4-5` +- `anthropic/claude-3-5-haiku` +- `openai/gpt-4o-mini` + +Miners do not configure the API key or model — the harness injects both. + +#### Observe → Plan → Act pattern + +```python +from forge.sdk.llm import LLMClient +import json + +def generate(spec: dict, llm: LLMClient) -> bytes: + # Observe: extract constraints + c = spec["constraints"] + + # Plan: ask the LLM to reason about geometry parameters + raw = llm.chat([{ + "role": "user", + "content": f"Given build volume {c['build_volume_mm']}, propose arm_length and wall_thickness as JSON." + }]) + dims = json.loads(raw) + + # Act: build the geometry with build123d + from build123d import Box, BuildPart + with BuildPart() as part: + Box(dims["arm_length"], dims["wall_thickness"], dims["wall_thickness"]) + + # ... export to STEP and return bytes +``` + +See `agents/example-llm/agent.py` for a complete working example. + The agent runs inside a Docker container with these constraints: - **Time:** 60 seconds - **Memory:** 4 GB -- **Network:** disabled -- **Libraries available:** `build123d`, `gmsh`, `numpy`, `scipy`, `OCP` +- **Network:** enabled (required for LLM API calls) +- **Libraries available:** `build123d`, `gmsh`, `numpy`, `scipy`, `OCP`, `httpx` ### 3. Test locally diff --git a/agents/example-llm/agent.py b/agents/example-llm/agent.py new file mode 100644 index 0000000..d7d04ed --- /dev/null +++ b/agents/example-llm/agent.py @@ -0,0 +1,96 @@ +""" +Example LLM agent — observe → plan → act. + +The harness injects an LLMClient bound to a whitelisted model. This agent +asks the LLM to propose dimensions for a simple L-bracket, then builds it +with build123d and returns STEP bytes. +""" + +from __future__ import annotations + +import json + +from build123d import ( + Box, + BuildPart, + Cylinder, + Location, + Mode, + Pos, + export_step, +) + +from forge.sdk.llm import LLMClient + + +def generate(spec: dict, llm: LLMClient) -> bytes: + # ── Observe ────────────────────────────────────────────────────────────── + c = spec["constraints"] + bv = c["build_volume_mm"] # [x, y, z] bounding box + load_pt = c["load_point_mm"] # [x, y, z] + bolt_d = c["bolt_diameter_clearance_mm"] + min_wall = c["min_wall_thickness_mm"] + + # ── Plan (LLM proposes dimensions) ──────────────────────────────────────── + prompt = f"""You are a mechanical CAD assistant. Given this bracket spec, propose +integer dimensions (mm) for a minimal L-bracket with a vertical mount plate and a +horizontal arm. Reply with ONLY valid JSON, no prose. + +Spec: + build_volume_mm: {bv} + load_point_mm: {load_pt} + bolt_clearance_mm: {bolt_d} + min_wall_mm: {min_wall} + +Return JSON with exactly these keys: + arm_length — horizontal arm length (x-axis), int + arm_thickness — arm wall thickness, int >= {max(4, int(min_wall) + 2)} + plate_height — mount plate height (z-axis), int + plate_width — mount plate width (y-axis), int + plate_thickness — mount plate thickness (x-axis), int >= {max(4, int(min_wall) + 2)} +""" + + raw = llm.chat( + [{"role": "user", "content": prompt}], + max_tokens=256, + ) + + dims = json.loads(raw) + arm_len = int(dims["arm_length"]) + arm_t = int(dims["arm_thickness"]) + plate_h = int(dims["plate_height"]) + plate_w = int(dims["plate_width"]) + plate_t = int(dims["plate_thickness"]) + + # Clamp to build volume + arm_len = min(arm_len, int(bv[0]) - plate_t) + plate_h = min(plate_h, int(bv[2])) + plate_w = min(plate_w, int(bv[1])) + + # ── Act (build geometry) ────────────────────────────────────────────────── + with BuildPart() as part: + # Vertical mount plate at x=0 face + with Pos(plate_t / 2, plate_w / 2, plate_h / 2): + Box(plate_t, plate_w, plate_h) + + # Horizontal arm extending along +x + arm_cx = plate_t + arm_len / 2 + with Pos(arm_cx, plate_w / 2, arm_t / 2): + Box(arm_len, plate_w, arm_t) + + # Clear bolt holes through the mount plate + bolt_r = bolt_d / 2 + for (by, bz) in c["bolt_pattern_mm"]: + with Pos(0, by + plate_w / 2 - plate_w / 2, bz): + # Cylinder along x-axis + Cylinder(bolt_r, plate_t, mode=Mode.SUBTRACT, + rotation=(0, 90, 0)) + + import tempfile, os + with tempfile.NamedTemporaryFile(suffix=".step", delete=False) as f: + path = f.name + try: + export_step(part.part, path) + return open(path, "rb").read() + finally: + os.unlink(path) diff --git a/agents/example-llm/spec.txt b/agents/example-llm/spec.txt new file mode 100644 index 0000000..30cda4b --- /dev/null +++ b/agents/example-llm/spec.txt @@ -0,0 +1 @@ +pub_001_medium \ No newline at end of file diff --git a/benchmark/_worker.py b/benchmark/_worker.py index 22d8f95..d121200 100644 --- a/benchmark/_worker.py +++ b/benchmark/_worker.py @@ -15,12 +15,16 @@ import argparse import importlib.util +import inspect import json import os import resource import sys from pathlib import Path +# Make forge.sdk importable regardless of install state. +sys.path.insert(0, str(Path(__file__).parent.parent)) + CPU_SECONDS = 150 @@ -53,7 +57,14 @@ def main() -> None: try: loader_spec.loader.exec_module(mod) - step_bytes = mod.generate(spec) + + sig = inspect.signature(mod.generate) + if len(sig.parameters) >= 2: + from forge.sdk.llm import LLMClient + llm = LLMClient() + step_bytes = mod.generate(spec, llm) + else: + step_bytes = mod.generate(spec) except Exception as exc: print(f"{type(exc).__name__}: {exc}", file=sys.stderr) sys.exit(1) diff --git a/forge/__init__.py b/forge/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/forge/sdk/__init__.py b/forge/sdk/__init__.py new file mode 100644 index 0000000..e60c25c --- /dev/null +++ b/forge/sdk/__init__.py @@ -0,0 +1,3 @@ +from forge.sdk.llm import LLMClient + +__all__ = ["LLMClient"] diff --git a/forge/sdk/llm.py b/forge/sdk/llm.py new file mode 100644 index 0000000..55c36a5 --- /dev/null +++ b/forge/sdk/llm.py @@ -0,0 +1,59 @@ +""" +LLM client for Forge agents. + +Reads configuration from environment: + FORGE_LLM_KEY — OpenRouter API key (required at chat time) + FORGE_MODEL — model ID to use (e.g. "anthropic/claude-haiku-4-5") + FORGE_MODEL_WHITELIST — comma-separated allowed model IDs; omit to allow any +""" + +from __future__ import annotations + +import os +from typing import Any + +import httpx + +OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" + + +class LLMClient: + def __init__(self) -> None: + self._key = os.environ.get("FORGE_LLM_KEY", "").strip() + self._model = os.environ.get("FORGE_MODEL", "").strip() + + raw_whitelist = os.environ.get("FORGE_MODEL_WHITELIST", "").strip() + self._whitelist: list[str] | None = ( + [m.strip() for m in raw_whitelist.split(",") if m.strip()] + if raw_whitelist + else None + ) + + if self._whitelist is not None and self._model not in self._whitelist: + raise ValueError( + f"Model {self._model!r} is not in the whitelist: {self._whitelist}" + ) + + @property + def model(self) -> str: + return self._model + + def chat(self, messages: list[dict[str, Any]], max_tokens: int = 4096) -> str: + if not self._key: + raise RuntimeError("No LLM key configured — set FORGE_LLM_KEY") + + response = httpx.post( + OPENROUTER_URL, + headers={ + "Authorization": f"Bearer {self._key}", + "Content-Type": "application/json", + }, + json={ + "model": self._model, + "messages": messages, + "max_tokens": max_tokens, + }, + timeout=60.0, + ) + response.raise_for_status() + return response.json()["choices"][0]["message"]["content"] diff --git a/requirements.txt b/requirements.txt index 08ef611..7e2a32b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ build123d>=0.8.0 gmsh>=4.12.0 +httpx>=0.27.0 numpy>=1.26.0 From 67e9098a3a5726dd8ae849d3edb6471167cd2a1d Mon Sep 17 00:00:00 2001 From: Punch Date: Tue, 2 Jun 2026 17:21:41 +0000 Subject: [PATCH 3/9] Move example-llm to examples/, fallback LLM key to OPENROUTER_KEY --- .github/workflows/eval.yml | 2 +- CONTRIBUTING.md | 2 +- {agents/example-llm => examples/llm-agent}/agent.py | 0 {agents/example-llm => examples/llm-agent}/spec.txt | 0 4 files changed, 2 insertions(+), 2 deletions(-) rename {agents/example-llm => examples/llm-agent}/agent.py (100%) rename {agents/example-llm => examples/llm-agent}/spec.txt (100%) diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml index f8b0179..66a67f6 100644 --- a/.github/workflows/eval.yml +++ b/.github/workflows/eval.yml @@ -105,7 +105,7 @@ jobs: --security-opt no-new-privileges \ --memory 4g \ --cpus 2 \ - -e FORGE_LLM_KEY=${{ secrets.FORGE_LLM_KEY }} \ + -e FORGE_LLM_KEY=${{ secrets.FORGE_LLM_KEY || secrets.OPENROUTER_KEY }} \ -e FORGE_MODEL=${{ secrets.FORGE_MODEL || 'anthropic/claude-haiku-4-5' }} \ -e FORGE_MODEL_WHITELIST=${{ vars.FORGE_MODEL_WHITELIST || 'anthropic/claude-haiku-4-5,anthropic/claude-3-5-haiku,openai/gpt-4o-mini' }} \ -v "${{ github.workspace }}:/forge" \ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e0e3f85..835e0d6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -84,7 +84,7 @@ def generate(spec: dict, llm: LLMClient) -> bytes: # ... export to STEP and return bytes ``` -See `agents/example-llm/agent.py` for a complete working example. +See `examples/llm-agent/agent.py` for a complete working example. The agent runs inside a Docker container with these constraints: - **Time:** 60 seconds diff --git a/agents/example-llm/agent.py b/examples/llm-agent/agent.py similarity index 100% rename from agents/example-llm/agent.py rename to examples/llm-agent/agent.py diff --git a/agents/example-llm/spec.txt b/examples/llm-agent/spec.txt similarity index 100% rename from agents/example-llm/spec.txt rename to examples/llm-agent/spec.txt From 886a1fe4c46830de92e8405f22b59a6481169420 Mon Sep 17 00:00:00 2001 From: Punch Date: Tue, 2 Jun 2026 17:28:37 +0000 Subject: [PATCH 4/9] Remove pub-bracket-v24 from platform PR --- agents/pub-bracket-v24/agent.py | 131 -------------------------------- 1 file changed, 131 deletions(-) delete mode 100644 agents/pub-bracket-v24/agent.py diff --git a/agents/pub-bracket-v24/agent.py b/agents/pub-bracket-v24/agent.py deleted file mode 100644 index cd69898..0000000 --- a/agents/pub-bracket-v24/agent.py +++ /dev/null @@ -1,131 +0,0 @@ -""" -pub-bracket-v24: Minimum hollow arm + frame plate for pub_001_medium. - -Improvement over v21b: reduces fw from 8mm to 3×mw=3.6mm (minimum hollow -cross-section) while keeping the proven frame plate and geometry from v21b. - -Structural analysis (pub_001: F=464.48N, fw=3.6mm, mw=1.2mm, h=67mm): - M = F × arm_len = 464.48 × 87.3 = 40,553 N·mm - I = (3.6×67³ − 1.2×64.6³) / 12 = 63,270 mm⁴ - c = 33.5 mm - σ_analytical = 40,553 × 33.5 / 63,270 = 21.5 MPa - FEA/analytical ratio from v21b: 15.55/18.2 = 0.855 - σ_FEA_estimate = 0.855 × 21.5 = 18.4 MPa → 73.6% of 25 MPa ✓ - -Mesh clearance: inner ceiling = h − mw = 65.8mm, load_z = 38.6mm → 27.2mm >> 10mm ✓ -Arm tip cap: inner ends at arm_len−mw so tip is inherently solid (learned from v22). -Frame plate: unchanged from v21b (2 interior voids for 3×2 bolt grid). - -Estimated mass ≈ 20.7 g (−7.8% from v21b at 22.44 g). -""" - -from __future__ import annotations - -import os -import tempfile - - -def generate(spec: dict) -> bytes: - """Return STEP bytes for pub-bracket-v24 (fw=3.6mm arm + frame plate, pub_001).""" - from OCP.BRepAlgoAPI import BRepAlgoAPI_Cut, BRepAlgoAPI_Fuse - from OCP.BRepPrimAPI import BRepPrimAPI_MakeBox, BRepPrimAPI_MakeCylinder - from OCP.Interface import Interface_Static - from OCP.STEPControl import STEPControl_AsIs, STEPControl_Writer - from OCP.gp import gp_Ax2, gp_Dir, gp_Pnt - - c = spec["constraints"] - bolts = c["bolt_pattern_mm"] - bolt_d = c["bolt_diameter_clearance_mm"] - lp = c["load_point_mm"] - min_wall = c.get("min_wall_thickness_mm", 1.2) - bv = c["build_volume_mm"] - - lx, ly, lz = lp[0], lp[1], lp[2] - - by_cols = sorted(set(p[0] for p in bolts)) - bz_rows = sorted(set(p[1] for p in bolts)) - bolt_r = bolt_d / 2.0 - strip_w = bolt_r + min_wall - - # Plate margins — identical to v21b - bolt_y_span = max(by_cols) - min(by_cols) - bolt_z_span = max(bz_rows) - min(bz_rows) - bvy = bv[1] - bvz = bv[2] - y_half_avail = (bvy - bolt_y_span) / 2.0 - 0.5 - z_half_avail = (bvz - bolt_z_span) / 2.0 - 0.5 - margin_y = min(strip_w, max(0.0, y_half_avail)) - margin_z = min(strip_w, max(0.0, z_half_avail)) - MIN_SLIVER = 0.5 - if 0.0 < margin_y - bolt_r < MIN_SLIVER: - margin_y = bolt_r - MIN_SLIVER - if 0.0 < margin_z - bolt_r < MIN_SLIVER: - margin_z = bolt_r - MIN_SLIVER - - plate_t = min_wall - plate_y0 = min(by_cols) - margin_y - plate_y1 = max(by_cols) + margin_y - plate_z0 = min(bz_rows) - margin_z - plate_z1 = max(bz_rows) + margin_z - - # Grid frame plate — same void-cut logic as v21b - plate = BRepPrimAPI_MakeBox( - gp_Pnt(0.0, plate_y0, plate_z0), - gp_Pnt(plate_t, plate_y1, plate_z1), - ).Shape() - - col_gap_starts = [y + strip_w for y in by_cols[:-1]] - col_gap_ends = [y - strip_w for y in by_cols[1:]] - row_gap_starts = [z + strip_w for z in bz_rows[:-1]] - row_gap_ends = [z - strip_w for z in bz_rows[1:]] - - for y0, y1 in zip(col_gap_starts, col_gap_ends): - for z0, z1 in zip(row_gap_starts, row_gap_ends): - if y0 < y1 and z0 < z1: - void = BRepPrimAPI_MakeBox( - gp_Pnt(0.0, y0, z0), - gp_Pnt(plate_t, y1, z1), - ).Shape() - cut = BRepAlgoAPI_Cut(plate, void) - cut.Build() - plate = cut.Shape() - - # Arm: fw = 3×mw = 3.6mm (minimum hollow cross-section) - # Inner cavity ends at arm_len−mw (solid tip cap, from v22 learning) - fw = 3.0 * min_wall # = 3.6mm - t_wall = min_wall - h = 67.0 # proven in v21b (62mm gave mesh divergence) - arm_len = max(lx - 12.0, 1.0) - arm_len = min(arm_len, bv[0] - 2.0) - yc = ly - - outer = BRepPrimAPI_MakeBox( - gp_Pnt(0.0, yc - fw / 2, 0.0), - gp_Pnt(arm_len, yc + fw / 2, h), - ).Shape() - # Inner ends at arm_len-t_wall: solid tip cap avoids open-edge stress concentration - inner = BRepPrimAPI_MakeBox( - gp_Pnt(plate_t, yc - fw / 2 + t_wall, t_wall), - gp_Pnt(arm_len - t_wall, yc + fw / 2 - t_wall, h - t_wall), - ).Shape() - arm = BRepAlgoAPI_Cut(outer, inner).Shape() - - body = BRepAlgoAPI_Fuse(arm, plate).Shape() - - for by, bz in bolts: - ax = gp_Ax2(gp_Pnt(-1.0, by, bz), gp_Dir(1, 0, 0)) - hole = BRepPrimAPI_MakeCylinder(ax, bolt_r, plate_t + 2.0).Shape() - body = BRepAlgoAPI_Cut(body, hole).Shape() - - writer = STEPControl_Writer() - Interface_Static.SetCVal_s("write.step.schema", "AP214IS") - writer.Transfer(body, STEPControl_AsIs) - - with tempfile.NamedTemporaryFile(suffix=".step", delete=False) as f: - path = f.name - try: - writer.Write(path) - with open(path, "rb") as f: - return f.read() - finally: - os.unlink(path) From ecae612de53eb261c84634c680dd245d5799ece6 Mon Sep 17 00:00:00 2001 From: Punch Date: Tue, 2 Jun 2026 17:29:40 +0000 Subject: [PATCH 5/9] Trigger CI: no agent eval, verify infra changes pass --- agents/.keep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 agents/.keep diff --git a/agents/.keep b/agents/.keep new file mode 100644 index 0000000..e69de29 From a02705ec415d2d9260235033b280f7aca917ca74 Mon Sep 17 00:00:00 2001 From: Punch Date: Tue, 2 Jun 2026 17:31:51 +0000 Subject: [PATCH 6/9] Pass LLM env defaults in forge eval for LLM agents --- cli.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cli.py b/cli.py index 6e99c06..5baeb1b 100644 --- a/cli.py +++ b/cli.py @@ -472,8 +472,15 @@ def _run_evaluate(agent_path: str, spec_path: str, verbose: bool) -> dict: "--spec", spec_path, "--json", ] + # Inherit environment; supply defaults so LLM agents work without extra setup. + env = os.environ.copy() + env.setdefault("FORGE_MODEL", "anthropic/claude-haiku-4-5") + env.setdefault( + "FORGE_MODEL_WHITELIST", + "anthropic/claude-haiku-4-5,anthropic/claude-3-5-haiku,openai/gpt-4o-mini", + ) try: - proc = subprocess.run(cmd, capture_output=True, text=True, cwd=str(ROOT)) + proc = subprocess.run(cmd, capture_output=True, text=True, cwd=str(ROOT), env=env) except FileNotFoundError: return {"passed": False, "stage": "error", "reason": "benchmark module not found — run from repo root"} From 6092269a6c687b805e4075c88976dc2de1f4631d Mon Sep 17 00:00:00 2001 From: Punch Date: Tue, 2 Jun 2026 18:19:52 +0000 Subject: [PATCH 7/9] Update docs: LLM agent interface, current SOTA, network note --- QUICKSTART.md | 28 ++++++++++++++++-------- README.md | 37 +++++++++++++++++++++----------- agents/template/agent.py | 46 +++++++++++++++++++++++----------------- 3 files changed, 70 insertions(+), 41 deletions(-) diff --git a/QUICKSTART.md b/QUICKSTART.md index 652ea39..63f0d53 100644 --- a/QUICKSTART.md +++ b/QUICKSTART.md @@ -75,21 +75,31 @@ curl http://143.244.191.193:8000/specs/001_bracket cp -r agents/template agents/ ``` -Edit `agents//agent.py`. The only contract: +Edit `agents//agent.py`. Two supported signatures: +**Static agent** (no LLM): ```python def generate(spec: dict) -> bytes: - """ - Takes the spec dict (load, bolt pattern, build volume, material). - Returns STEP file bytes for your design. - """ + """Takes the spec dict, returns STEP file bytes.""" ... ``` -See `agents/taper-beam/agent.py` for a clean I-beam reference implementation (~38g). -See `agents/lean-arm/agent.py` for the I-beam baseline (~32g). -See `agents/pocket-plate/agent.py` for the wall-pocketing approach (~30g). -See `agents/compact-arm/agent.py` for the current SOTA (~27g). +**LLM agent** (recommended — harness injects the client): +```python +from forge.sdk.llm import LLMClient + +def generate(spec: dict, llm: LLMClient) -> bytes: + """Use the LLM to reason about geometry, then return STEP bytes.""" + response = llm.chat([{"role": "user", "content": "..."}]) + ... +``` + +No API key needed — the harness injects `LLMClient` automatically using whitelisted models. See `examples/llm-agent/agent.py` for a complete working example. + +Reference implementations in `agents/`: +- `taper-beam/` — clean I-beam (~38g) +- `lean-arm/` — I-beam baseline (~32g) +- `compact-arm/` — pocketed arm approach --- diff --git a/README.md b/README.md index f38f8c1..a576458 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ forge eval agents//agent.py ## Submitting 1. Fork this repo. -2. Create `agents//agent.py` with a `generate(spec: dict) -> bytes` function. +2. Create `agents//agent.py` with a `generate(spec, [llm]) -> bytes` function. 3. Open a PR. CI scores your design automatically (~2 min) and posts: ``` ## Forge Eval — NEW LEADER 🏆 @@ -77,18 +77,30 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for full guidelines. ## Agent interface +Two supported signatures — the harness detects which one you use automatically: + +**Static agent** (no LLM): ```python def generate(spec: dict) -> bytes: - """ - Takes the spec dict (load, bolt pattern, build volume, material). - Returns STEP file bytes for your design. + """Build and return STEP file bytes for the given spec.""" + ... +``` - Sandbox: 60s timeout, 4GB RAM, no network access. - """ +**LLM agent** (recommended): +```python +from forge.sdk.llm import LLMClient + +def generate(spec: dict, llm: LLMClient) -> bytes: + """Use the LLM to reason about geometry, then return STEP bytes.""" + response = llm.chat([{"role": "user", "content": "..."}]) ... ``` -Libraries available in eval: `build123d`, `OCP`, `gmsh`, `numpy`, `scipy`. See agents/ for reference implementations. +The harness injects `LLMClient` automatically — no API key required. Whitelisted models: `claude-haiku-4-5`, `claude-3-5-haiku`, `gpt-4o-mini`. See `examples/llm-agent/` for a complete working example. + +Sandbox constraints: **60s timeout · 4 GB RAM · network enabled (LLM calls only)** + +Libraries available: `build123d`, `OCP`, `gmsh`, `numpy`, `scipy`, `httpx`. See `agents/` for reference implementations. --- @@ -135,11 +147,12 @@ All CPU. No GPU required. Live: http://143.244.191.193:8000/sota -| Spec | Score | Agent | FEA Stress | -|---|---|---|---| -| 001 Wall Bracket | **27.22 g** | compact-arm | 13.8 / 25.0 MPa | -| 002 Equipment Mount | — | — | — | -| 003 Pipe-Clamp | 2799.52 g | baseline_steel | 22.18 / 82.0 MPa | +| Spec | Score | Agent | +|---|---|---| +| spec-001 Wall Bracket | **23.48 g** | sub-nano | +| spec-002 Equipment Mount | **25.84 g** | al-bracket-v19 | +| spec-003 Pipe-Clamp | **71.42 g** | ss-bracket-v15 | +| pub_001 – pub_005 | see leaderboard | various | --- diff --git a/agents/template/agent.py b/agents/template/agent.py index 0bb7c38..d7abfdb 100644 --- a/agents/template/agent.py +++ b/agents/template/agent.py @@ -1,17 +1,23 @@ """ Template agent — start here. -Contract: implement generate(spec) -> bytes (STEP file). +Two supported signatures: -The eval harness calls generate() with the spec dict for each problem. -Return valid STEP bytes. The harness handles geometry checks and FEA; -your job is to return the lightest part that passes all constraints. + generate(spec: dict) -> bytes # static agent + generate(spec: dict, llm: LLMClient) -> bytes # LLM agent (recommended) -See QUICKSTART.md for a full walkthrough. +The harness detects which you use via inspect.signature and injects LLMClient +automatically if present — no API key required from you. + +See QUICKSTART.md for a full walkthrough and examples/llm-agent/ for an +LLM agent example. """ from __future__ import annotations +# To use the LLM client, uncomment: +# from forge.sdk.llm import LLMClient + # TODO: import your geometry library # from build123d import ... # recommended # from OCP.BRepPrimAPI import ... # raw OCP (see agents/baseline/) @@ -21,27 +27,27 @@ def generate(spec: dict) -> bytes: """ Build and return a STEP file for the given spec. + To use an LLM, change the signature to: generate(spec, llm: LLMClient) + Args: - spec: Problem specification dict. Key structure: - spec["constraints"]["load_n"] — applied load in Newtons - spec["constraints"]["load_point_mm"] — [x, y, z] load application point - spec["constraints"]["build_volume_mm"] — [x, y, z] max bounding box - spec["constraints"]["bolt_pattern_mm"] — [[y, z], ...] bolt hole centers (x=0 plane) - spec["constraints"]["bolt_diameter_clearance_mm"] — minimum clearance diameter - spec["constraints"]["min_wall_thickness_mm"] — minimum feature wall - spec["constraints"]["max_overhang_deg"] — max overhang from vertical - spec["material"] — material name (see benchmark/materials.py) + spec: Problem specification dict. Key fields: + spec["constraints"]["load_n"] — load in Newtons + spec["constraints"]["load_point_mm"] — [x, y, z] load point + spec["constraints"]["build_volume_mm"] — [x, y, z] bounding box + spec["constraints"]["bolt_pattern_mm"] — [[y, z], ...] bolt centers + spec["constraints"]["bolt_diameter_clearance_mm"] — hole clearance + spec["constraints"]["min_wall_thickness_mm"] — minimum wall + spec["constraints"]["max_overhang_deg"] — max printable overhang + spec["material"] — material name spec["safety_factor"] — FEA stress safety factor + spec["scoring"]["metric"] — "mass_grams" | "volume_mm3" | ... Returns: - STEP file as raw bytes. Must be valid AP214IS STEP. + STEP file as raw bytes (AP214IS schema required). Notes: - - Must be deterministic: same spec → same bytes every call. - If you use any randomness, fix the seed (e.g. random.seed(42)). - - The FEA mesh uses C3D4 linear tets at ~2 mm characteristic length. - Avoid features thinner than 3 mm — they produce degenerate elements. - - Lower mass = better score. There is no ceiling; keep optimizing. + - Must be deterministic: same spec → same bytes. Fix any random seeds. + - Avoid features thinner than 3 mm — they produce degenerate FEA elements. """ constraints = spec["constraints"] From 132a9e362f4b0942caf2a7e5df45f161c6daa588 Mon Sep 17 00:00:00 2001 From: Punch Date: Tue, 2 Jun 2026 18:32:57 +0000 Subject: [PATCH 8/9] Skip template agent in eval CI --- .github/workflows/eval.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml index 66a67f6..8d9c9a8 100644 --- a/.github/workflows/eval.yml +++ b/.github/workflows/eval.yml @@ -21,7 +21,7 @@ jobs: id: agent run: | AGENT=$(git diff --name-only origin/${{ github.base_ref }}...HEAD \ - | grep '^agents/.*/agent\.py$' | head -1) + | grep '^agents/.*/agent\.py$' | grep -v '^agents/template/' | head -1) if [ -z "$AGENT" ]; then echo "No agent.py changed — skipping eval." echo "found=false" >> "$GITHUB_OUTPUT" From c946e730491575f578ba0d907072b674b2ccc584 Mon Sep 17 00:00:00 2001 From: Punch Date: Tue, 2 Jun 2026 18:35:01 +0000 Subject: [PATCH 9/9] Update PR template for LLM agents and forge CLI --- .github/PULL_REQUEST_TEMPLATE.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 283cd0e..88f42ee 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -4,7 +4,7 @@ ## Spec - + ## Score @@ -12,11 +12,12 @@ ## Approach - + ## Checklist -- [ ] `agents//agent.py` implements `generate(spec) -> bytes` -- [ ] Local eval passes: `docker run ... --agent agents//agent.py --spec specs/001_bracket.json` -- [ ] No external network calls in `generate()` -- [ ] Agent is deterministic (same output for same spec) +- [ ] `agents//agent.py` implements `generate(spec) -> bytes` or `generate(spec, llm) -> bytes` +- [ ] `agents//spec.txt` contains the target spec ID (e.g., `pub_001_medium`) +- [ ] Local eval passes: `forge eval agents//agent.py` +- [ ] Agent is deterministic (same spec → same bytes; fix any random seeds) +- [ ] LLM agents: using an injected `LLMClient`, not a hardcoded API key