From b3b8a9039ba5dc8a1211c50d44e562f0098c169b Mon Sep 17 00:00:00 2001 From: Punch Date: Tue, 2 Jun 2026 23:23:11 +0000 Subject: [PATCH 1/4] Add eval-verify agent to test multi-spec pool CI across all 3 categories --- agents/eval-verify/agent.py | 77 +++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 agents/eval-verify/agent.py diff --git a/agents/eval-verify/agent.py b/agents/eval-verify/agent.py new file mode 100644 index 0000000..439d612 --- /dev/null +++ b/agents/eval-verify/agent.py @@ -0,0 +1,77 @@ +""" +Pipeline verification agent — intentionally simple L-bracket. + +Purpose: verify the multi-spec pool eval works for all 3 categories +(mass, stiffness-to-weight, deflection) end-to-end. Not meant to compete; +this agent should be closed without merging after CI passes. + +Score: ~165g. Miners beat this by removing material where stress is low. +The bracket has a vertical mounting plate (bolt holes), a horizontal shelf +(reaches the load point), and no topology optimization whatsoever. +""" + +from __future__ import annotations + +import os +import tempfile + + +def generate(spec: dict) -> bytes: + """Build a parametric L-bracket and return STEP bytes.""" + from OCP.BRepAlgoAPI import BRepAlgoAPI_Cut, BRepAlgoAPI_Fuse + from OCP.BRepPrimAPI import BRepPrimAPI_MakeBox, BRepPrimAPI_MakeCylinder + from OCP.Interface import Interface_Static + from OCP.STEPControl import STEPControl_AsIs, STEPControl_Writer + from OCP.gp import gp_Ax2, gp_Dir, gp_Pnt + + constraints = spec["constraints"] + bolt_pattern = constraints["bolt_pattern_mm"] + bolt_d = constraints["bolt_diameter_clearance_mm"] + + by_coords = [p[0] for p in bolt_pattern] + bz_coords = [p[1] for p in bolt_pattern] + plate_y = max(by_coords) + 20.0 + plate_z = max(bz_coords) + 20.0 + plate_thickness = 10.0 + + shelf_length = constraints["load_point_mm"][0] + 15.0 + shelf_thickness = 12.0 + shelf_z = plate_z + + # Mounting plate + plate = BRepPrimAPI_MakeBox( + gp_Pnt(0.0, 0.0, 0.0), + gp_Pnt(plate_thickness, plate_y, plate_z), + ).Shape() + + # Horizontal shelf + shelf = BRepPrimAPI_MakeBox( + gp_Pnt(0.0, 0.0, 0.0), + gp_Pnt(shelf_length, shelf_z, shelf_thickness), + ).Shape() + + fused = BRepAlgoAPI_Fuse(plate, shelf) + fused.Build() + body = fused.Shape() + + # Bolt holes through mounting plate + for by, bz in bolt_pattern: + axis = gp_Ax2(gp_Pnt(-1.0, by, bz), gp_Dir(1.0, 0.0, 0.0)) + hole = BRepPrimAPI_MakeCylinder(axis, bolt_d / 2, plate_thickness + 2.0).Shape() + cut = BRepAlgoAPI_Cut(body, hole) + cut.Build() + body = cut.Shape() + + # Write STEP + writer = STEPControl_Writer() + Interface_Static.SetCVal_s("write.step.schema", "AP214IS") + writer.Transfer(body, STEPControl_AsIs) + + with tempfile.NamedTemporaryFile(suffix=".step", delete=False) as f: + path = f.name + try: + writer.Write(path) + with open(path, "rb") as f: + return f.read() + finally: + os.unlink(path) From fd8510cf339fe19bf0693d8a509e121067162884 Mon Sep 17 00:00:00 2001 From: Punch Date: Tue, 2 Jun 2026 23:27:20 +0000 Subject: [PATCH 2/4] Fix eval-verify agent to fit within any spec build volume --- agents/eval-verify/agent.py | 59 +++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/agents/eval-verify/agent.py b/agents/eval-verify/agent.py index 439d612..d97b133 100644 --- a/agents/eval-verify/agent.py +++ b/agents/eval-verify/agent.py @@ -1,13 +1,11 @@ """ -Pipeline verification agent — intentionally simple L-bracket. +Pipeline verification agent — adaptive L-bracket for all 3 rounds. -Purpose: verify the multi-spec pool eval works for all 3 categories -(mass, stiffness-to-weight, deflection) end-to-end. Not meant to compete; -this agent should be closed without merging after CI passes. +Generates a valid bracket for any spec in rounds 1-3. Not optimized +for any metric — purpose is to verify the multi-spec pool eval and CI +pipeline work correctly for mass, stiffness-to-weight, and deflection. -Score: ~165g. Miners beat this by removing material where stress is low. -The bracket has a vertical mounting plate (bolt holes), a horizontal shelf -(reaches the load point), and no topology optimization whatsoever. +Close this PR without merging after CI passes all 3 categories. """ from __future__ import annotations @@ -17,47 +15,58 @@ def generate(spec: dict) -> bytes: - """Build a parametric L-bracket and return STEP bytes.""" + """Build a parametric L-bracket that fits any spec's constraints.""" from OCP.BRepAlgoAPI import BRepAlgoAPI_Cut, BRepAlgoAPI_Fuse from OCP.BRepPrimAPI import BRepPrimAPI_MakeBox, BRepPrimAPI_MakeCylinder from OCP.Interface import Interface_Static from OCP.STEPControl import STEPControl_AsIs, STEPControl_Writer from OCP.gp import gp_Ax2, gp_Dir, gp_Pnt - constraints = spec["constraints"] - bolt_pattern = constraints["bolt_pattern_mm"] - bolt_d = constraints["bolt_diameter_clearance_mm"] + c = spec["constraints"] + bv = c["build_volume_mm"] # [x, y, z] max extents + bolts = c["bolt_pattern_mm"] # [[y, z], ...] on mount face + bolt_d = c["bolt_diameter_clearance_mm"] + load_pt = c["load_point_mm"] # [x, y, z] - by_coords = [p[0] for p in bolt_pattern] - bz_coords = [p[1] for p in bolt_pattern] - plate_y = max(by_coords) + 20.0 - plate_z = max(bz_coords) + 20.0 - plate_thickness = 10.0 + bvx, bvy, bvz = bv[0], bv[1], bv[2] - shelf_length = constraints["load_point_mm"][0] + 15.0 - shelf_thickness = 12.0 - shelf_z = plate_z + # Bolt extents — clamp plate dimensions to build volume + bolt_ys = [b[0] for b in bolts] + bolt_zs = [b[1] for b in bolts] + by_max = max(bolt_ys) + bz_max = max(bolt_zs) + + plate_t = min(12.0, bvx * 0.08) + plate_y = min(bvy - 2.0, by_max + 12.0) # clamped to build volume Y + plate_z = min(bvz - 2.0, bz_max + 12.0) # clamped to build volume Z + + # Shelf: extends from plate to past load point, stays within build volume + shelf_len = min(bvx - plate_t - 2.0, load_pt[0] + 10.0) + shelf_h = min(plate_z, bvz * 0.8) + shelf_t = min(15.0, bvz * 0.15, plate_z * 0.2) # Mounting plate plate = BRepPrimAPI_MakeBox( gp_Pnt(0.0, 0.0, 0.0), - gp_Pnt(plate_thickness, plate_y, plate_z), + gp_Pnt(plate_t, plate_y, plate_z), ).Shape() # Horizontal shelf shelf = BRepPrimAPI_MakeBox( gp_Pnt(0.0, 0.0, 0.0), - gp_Pnt(shelf_length, shelf_z, shelf_thickness), + gp_Pnt(plate_t + shelf_len, plate_y, shelf_t), ).Shape() fused = BRepAlgoAPI_Fuse(plate, shelf) fused.Build() body = fused.Shape() - # Bolt holes through mounting plate - for by, bz in bolt_pattern: - axis = gp_Ax2(gp_Pnt(-1.0, by, bz), gp_Dir(1.0, 0.0, 0.0)) - hole = BRepPrimAPI_MakeCylinder(axis, bolt_d / 2, plate_thickness + 2.0).Shape() + # Bolt clearance holes through mounting plate + for by, bz in bolts: + by_c = min(max(by, bolt_d / 2 + 1), plate_y - bolt_d / 2 - 1) + bz_c = min(max(bz, bolt_d / 2 + 1), plate_z - bolt_d / 2 - 1) + axis = gp_Ax2(gp_Pnt(-1.0, by_c, bz_c), gp_Dir(1.0, 0.0, 0.0)) + hole = BRepPrimAPI_MakeCylinder(axis, bolt_d / 2, plate_t + 2.0).Shape() cut = BRepAlgoAPI_Cut(body, hole) cut.Build() body = cut.Shape() From e05dcc4a26bcca80b9b80f7dc54620b154064ad4 Mon Sep 17 00:00:00 2001 From: Punch Date: Tue, 2 Jun 2026 23:27:58 +0000 Subject: [PATCH 3/4] Center arm at load point Z to ensure FEA load node coverage --- agents/eval-verify/agent.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/agents/eval-verify/agent.py b/agents/eval-verify/agent.py index d97b133..e9ad8b8 100644 --- a/agents/eval-verify/agent.py +++ b/agents/eval-verify/agent.py @@ -30,20 +30,32 @@ def generate(spec: dict) -> bytes: bvx, bvy, bvz = bv[0], bv[1], bv[2] - # Bolt extents — clamp plate dimensions to build volume + # Bolt extents bolt_ys = [b[0] for b in bolts] bolt_zs = [b[1] for b in bolts] by_max = max(bolt_ys) bz_max = max(bolt_zs) + # Plate dimensions — clamped to build volume plate_t = min(12.0, bvx * 0.08) - plate_y = min(bvy - 2.0, by_max + 12.0) # clamped to build volume Y - plate_z = min(bvz - 2.0, bz_max + 12.0) # clamped to build volume Z + plate_y = min(bvy - 2.0, by_max + 12.0) + plate_z = min(bvz - 2.0, bz_max + 12.0) - # Shelf: extends from plate to past load point, stays within build volume - shelf_len = min(bvx - plate_t - 2.0, load_pt[0] + 10.0) - shelf_h = min(plate_z, bvz * 0.8) - shelf_t = min(15.0, bvz * 0.15, plate_z * 0.2) + # Arm: extends from plate to past load point in X, centered on load point Y/Z + arm_len = min(bvx - plate_t - 2.0, load_pt[0] + 10.0) + arm_w = min(plate_y * 0.8, bvy * 0.7) # arm width in Y + arm_h = min(plate_z * 0.5, bvz * 0.4) # arm height in Z + + # Center the arm vertically around the load point Z + arm_z_lo = max(0.0, load_pt[2] - arm_h / 2) + arm_z_hi = arm_z_lo + arm_h + if arm_z_hi > bvz - 2.0: + arm_z_hi = bvz - 2.0 + arm_z_lo = max(0.0, arm_z_hi - arm_h) + + # Center arm in Y around load point Y (clamped to plate width) + arm_y_lo = max(0.0, min(load_pt[1] - arm_w / 2, plate_y - arm_w)) + arm_y_hi = arm_y_lo + arm_w # Mounting plate plate = BRepPrimAPI_MakeBox( @@ -51,13 +63,13 @@ def generate(spec: dict) -> bytes: gp_Pnt(plate_t, plate_y, plate_z), ).Shape() - # Horizontal shelf - shelf = BRepPrimAPI_MakeBox( - gp_Pnt(0.0, 0.0, 0.0), - gp_Pnt(plate_t + shelf_len, plate_y, shelf_t), + # Arm + arm = BRepPrimAPI_MakeBox( + gp_Pnt(plate_t, arm_y_lo, arm_z_lo), + gp_Pnt(plate_t + arm_len, arm_y_hi, arm_z_hi), ).Shape() - fused = BRepAlgoAPI_Fuse(plate, shelf) + fused = BRepAlgoAPI_Fuse(plate, arm) fused.Build() body = fused.Shape() From 7d47846148bbd2f93b3562ab565513bc7382e6a3 Mon Sep 17 00:00:00 2001 From: Punch Date: Tue, 2 Jun 2026 23:33:39 +0000 Subject: [PATCH 4/4] Improve pool eval error reporting: print Docker stderr on empty output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also bump pids-limit 256→512 — OCP shape ops can spawn many threads. Co-Authored-By: Claude Sonnet 4.6 --- scripts/run_eval_pool.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scripts/run_eval_pool.py b/scripts/run_eval_pool.py index ad054da..1b68748 100644 --- a/scripts/run_eval_pool.py +++ b/scripts/run_eval_pool.py @@ -48,7 +48,7 @@ "docker", "run", "--rm", "--security-opt", "no-new-privileges", "--cap-drop", "ALL", - "--pids-limit", "256", + "--pids-limit", "512", "--memory", "4g", "--cpus", "2", "-e", f"FORGE_LLM_KEY={llm_key}", @@ -63,14 +63,24 @@ proc = subprocess.run(cmd, capture_output=True, text=True) out = proc.stdout.strip() + err = proc.stderr.strip() print(f"[{spec_id}] run {run_i + 1}/{runs}: {out}", flush=True) + if not out and err: + print(f"[{spec_id}] stderr (exit {proc.returncode}): {err[:500]}", flush=True) + elif proc.returncode != 0 and not out: + print(f"[{spec_id}] docker exited {proc.returncode} with no output", flush=True) try: result_data = json.loads(out) except (json.JSONDecodeError, ValueError): + reason = f"Invalid JSON output: {out[:120]}" + if not out and err: + reason = f"Docker error (exit {proc.returncode}): {err[:300]}" + elif not out: + reason = f"No output from docker (exit {proc.returncode})" result_data = { "passed": False, - "reason": f"Invalid JSON output: {out[:120]}", + "reason": reason, } # Check determinism on first spec