From 29aab0e189ad075f456def8851cc45d99fe78f87 Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Wed, 3 Jun 2026 13:14:16 +0000
Subject: [PATCH 1/6] Clamp baseline geometry to build volume, cover load point
 Z

---
 agents/baseline/agent.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/agents/baseline/agent.py b/agents/baseline/agent.py
index c7e0cc9..9c5a396 100644
--- a/agents/baseline/agent.py
+++ b/agents/baseline/agent.py
@@ -29,15 +29,19 @@ def generate(spec: dict, llm: LLMClient) -> bytes:  # noqa: ARG001
     constraints = spec["constraints"]
     bolt_pattern = constraints["bolt_pattern_mm"]
     bolt_d = constraints["bolt_diameter_clearance_mm"]
+    bv = constraints["build_volume_mm"]  # [x, y, z] limits
+    load_pt = constraints["load_point_mm"]
 
     by_coords = [p[0] for p in bolt_pattern]
     bz_coords = [p[1] for p in bolt_pattern]
-    plate_y = max(by_coords) + 20.0
-    plate_z = max(bz_coords) + 20.0
+    # Clamp to build volume so the bracket always fits
+    plate_y = min(max(by_coords) + 20.0, bv[1])
+    plate_z = min(max(bz_coords) + 20.0, bv[2])
     plate_thickness = 10.0
 
-    shelf_length = constraints["load_point_mm"][0] + 15.0
-    shelf_thickness = 12.0
+    # Shelf must reach the load point in X and cover its Z coordinate
+    shelf_length = min(load_pt[0] + 15.0, bv[0])
+    shelf_thickness = min(max(15.0, load_pt[2] + 10.0), bv[2])
     shelf_z = plate_z
 
     # Mounting plate

From df509dbe0ddaec0e97d8ccb65e124291298d09a1 Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Wed, 3 Jun 2026 13:19:35 +0000
Subject: [PATCH 2/6] Leave 2mm margin from build volume boundary in baseline

---
 agents/baseline/agent.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/agents/baseline/agent.py b/agents/baseline/agent.py
index 9c5a396..687c7b2 100644
--- a/agents/baseline/agent.py
+++ b/agents/baseline/agent.py
@@ -34,14 +34,14 @@ def generate(spec: dict, llm: LLMClient) -> bytes:  # noqa: ARG001
 
     by_coords = [p[0] for p in bolt_pattern]
     bz_coords = [p[1] for p in bolt_pattern]
-    # Clamp to build volume so the bracket always fits
-    plate_y = min(max(by_coords) + 20.0, bv[1])
-    plate_z = min(max(bz_coords) + 20.0, bv[2])
+    # Clamp to build volume — leave 2mm margin to avoid floating-point boundary failures
+    plate_y = min(max(by_coords) + 20.0, bv[1] - 2.0)
+    plate_z = min(max(bz_coords) + 20.0, bv[2] - 2.0)
     plate_thickness = 10.0
 
     # Shelf must reach the load point in X and cover its Z coordinate
-    shelf_length = min(load_pt[0] + 15.0, bv[0])
-    shelf_thickness = min(max(15.0, load_pt[2] + 10.0), bv[2])
+    shelf_length = min(load_pt[0] + 15.0, bv[0] - 2.0)
+    shelf_thickness = min(max(15.0, load_pt[2] + 10.0), bv[2] - 2.0)
     shelf_z = plate_z
 
     # Mounting plate

From 78b6193996b510d2145a887ddeb5be7b06781d95 Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Wed, 3 Jun 2026 13:24:09 +0000
Subject: [PATCH 3/6] Fix model IDs in CONTRIBUTING, add llm-agent to examples
 list

---
 CONTRIBUTING.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 07b588d..dfc6b96 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -28,10 +28,11 @@ def generate(spec: dict, llm: LLMClient) -> bytes:
     ...
 ```
 
-The harness injects `LLMClient` — no API key required. Whitelisted models: `claude-haiku-4-5`, `claude-3-5-haiku`, `gpt-4o-mini`. Both parameters are required; agents that omit `llm` are rejected at eval time.
+The harness injects `LLMClient` — no API key required. Whitelisted models: `anthropic/claude-haiku-4-5`, `anthropic/claude-3-5-haiku`, `openai/gpt-4o-mini`. Both parameters are required; agents that omit `llm` are rejected at eval time.
 
 Starting point in `examples/`:
-- `metric-aware-agent/` — adapts strategy per scoring metric, call `llm.chat()` to reason about geometry
+- `metric-aware-agent/` — adapts strategy per scoring metric; uses `llm.chat()` to reason about geometry
+- `llm-agent/` — minimal single-call LLM agent, good for getting started
 
 The agent runs inside a Docker container:
 - **Time:** 60 seconds | **Memory:** 4 GB | **Network:** enabled (LLM calls only)

From ca1a7c69a63112626ed4e6ee69e8368682b3e731 Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Wed, 3 Jun 2026 13:25:21 +0000
Subject: [PATCH 4/6] Serialize score.yml to prevent concurrent DB-write
 overload

---
 .github/workflows/score.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/score.yml b/.github/workflows/score.yml
index 7e78bfa..14c5799 100644
--- a/.github/workflows/score.yml
+++ b/.github/workflows/score.yml
@@ -12,6 +12,12 @@ on:
 env:
   FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
 
+# Serialize full-score jobs to avoid concurrent DB writes overwhelming the API.
+# Pending runs wait; they do NOT cancel each other (each merged agent must be scored).
+concurrency:
+  group: score-main
+  cancel-in-progress: false
+
 jobs:
   find-agent:
     runs-on: ubuntu-latest

From 97b274c1863c140a85e7bd433326c81120d1459e Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Wed, 3 Jun 2026 13:26:54 +0000
Subject: [PATCH 5/6] Add session-2 changelog entries

---
 CHANGELOG.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5090d60..46824e2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,15 @@ Format: newest entries first.
 
 ## [Unreleased]
 
+## 2026-06-03 (session 2)
+
+### Fixed
+- **Baseline agent build-volume overflow** (PR #214, `agents/baseline/agent.py`): naive L-bracket used `max_bolt_coord + 20mm` for plate dimensions, exceeding build volume on some specs; also `shelf_thickness=12mm` fixed regardless of spec load-point Z, causing "too few nodes near load point" FEA failure. Fixed: all dimensions clamped to build volume with 2mm margin; shelf height computed from `load_point_mm[2]`.
+- **CONTRIBUTING model IDs** (PR #215): model IDs were shown without provider prefix (`claude-haiku-4-5` vs `anthropic/claude-haiku-4-5`); corrected to match `FORGE_MODEL_WHITELIST` format. Added `llm-agent/` to examples list.
+
+### Changed
+- **score.yml serialization** (PR #216, `.github/workflows/score.yml`): add `concurrency: group: score-main, cancel-in-progress: false` to serialize post-merge full scoring runs. Prevents concurrent SQLite writes overwhelming forge-api at high miner volume.
+
 ## 2026-06-03
 
 ### Added

From ed1206ebe3f671a5e2f4c0498de39e22b6bd75f5 Mon Sep 17 00:00:00 2001
From: Punch <punch@punchthedev.ai>
Date: Wed, 3 Jun 2026 13:42:45 +0000
Subject: [PATCH 6/6] Harden CI eval: crash detection, empty STEP guard,
 extended timeout
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three independent fixes identified in scale readiness audit:

1. run_eval_pool.py: distinguish container crash (returncode != 0, no output)
   from bad JSON (container ran but output is garbage). Previously both
   showed "Invalid JSON output" — crash now shows "Container exited 137"
   with stderr tail, making OOM kills and segfaults debuggable by miners.

2. record_submissions.py: skip STEP files smaller than 200 bytes.
   The file is pre-created as 0 bytes before docker run so the container
   can write to it; if the container crashes mid-run the file stays empty.
   Storing an empty BLOB sets has_step=true for a submission with no
   geometry, breaking the 3D viewer for that entry.

3. score.yml: increase score-round timeout-minutes from 90 → 150.
   15 specs × ~180s each + Docker overhead ≈ 50 min per round; 90 min
   was dangerously close to the limit for slower specs under high load.
   eval.yml and hidden-eval remain at 90 min (3 specs each — sufficient).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/score.yml   |  2 +-
 scripts/record_submissions.py |  5 ++++-
 scripts/run_eval_pool.py      | 20 ++++++++++++++------
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/score.yml b/.github/workflows/score.yml
index 14c5799..45d9777 100644
--- a/.github/workflows/score.yml
+++ b/.github/workflows/score.yml
@@ -58,7 +58,7 @@ jobs:
     needs: find-agent
     if: needs.find-agent.outputs.found == 'true'
     runs-on: ubuntu-latest
-    timeout-minutes: 90
+    timeout-minutes: 150
     strategy:
       fail-fast: false
       matrix:
diff --git a/scripts/record_submissions.py b/scripts/record_submissions.py
index 61e935c..f542f53 100644
--- a/scripts/record_submissions.py
+++ b/scripts/record_submissions.py
@@ -35,7 +35,10 @@
     step_b64 = None
     step_file = Path(f".forge_step_{spec_id}.step")
     if step_file.exists():
-        step_b64 = base64.b64encode(step_file.read_bytes()).decode()
+        # Only include non-empty STEP files; a 0-byte file means the container
+        # crashed before writing geometry. 200 bytes is well below any valid STEP header.
+        if step_file.stat().st_size > 200:
+            step_b64 = base64.b64encode(step_file.read_bytes()).decode()
         step_file.unlink(missing_ok=True)
 
     score_metric = result.get("score_metric", entry.get("metric", "mass_grams"))
diff --git a/scripts/run_eval_pool.py b/scripts/run_eval_pool.py
index 7b699d7..641f16e 100644
--- a/scripts/run_eval_pool.py
+++ b/scripts/run_eval_pool.py
@@ -72,16 +72,24 @@
         if proc.stderr.strip():
             print(f"[{spec_id}] stderr:\n{proc.stderr.strip()}", flush=True)
 
-        try:
-            result_data = json.loads(out)
-        except (json.JSONDecodeError, ValueError):
-            # Surface the tail of stderr so miners can debug crashes.
-            stderr_tail = proc.stderr.strip()[-400:] if proc.stderr.strip() else ""
+        stderr_tail = proc.stderr.strip()[-400:] if proc.stderr.strip() else ""
+        if proc.returncode != 0 and not out:
+            # Container crashed before producing any output (OOM, segfault, etc.)
             hint = f" | stderr: {stderr_tail}" if stderr_tail else ""
             result_data = {
                 "passed": False,
-                "reason": f"Invalid JSON output: {out[:120]}{hint}",
+                "reason": f"Container exited {proc.returncode}{hint}",
             }
+        else:
+            try:
+                result_data = json.loads(out)
+            except (json.JSONDecodeError, ValueError):
+                # Surface the tail of stderr so miners can debug crashes.
+                hint = f" | stderr: {stderr_tail}" if stderr_tail else ""
+                result_data = {
+                    "passed": False,
+                    "reason": f"Invalid JSON output: {out[:120]}{hint}",
+                }
 
         # Check determinism on first spec
         if prev_score is not None: