alanshurafa · alanshurafa · May 31, 2026 · May 31, 2026 · Jun 1, 2026 · chatgpt-codex-connector
diff --git a/README.md b/README.md
@@ -23,6 +23,10 @@ bash ./co-evolve-bouncer.sh --vanilla --bounce-only docs/plan.md
 Use `dev-review` only when you want a code-focused compose -> bounce -> execute
 -> verify workflow.
 
+## Prerequisites
+
+The core co-evolve flow needs only the `claude` and `codex` CLIs. The optional eval harness (`evals/`) and PEL lab (`lab/pel/`) also need the mikefarah/Go `yq` (v4+), not the Python `yq` from Debian/Ubuntu's `apt install yq`, which is incompatible. Those components fail fast if the wrong `yq` is on `PATH`.
+
 ## Install On macOS/Linux
 
 From the cloned repo:

diff --git a/co-evolve-bouncer.sh b/co-evolve-bouncer.sh
@@ -64,6 +64,7 @@ Options:
   --chain            Use staged passes: critique -> defend -> tighten
   --bounces N        Max bounce passes (default: 2, ignored with --chain)
   --agents A,B       Agent pair (default: claude,codex)
+  --claude-model M   Override the Claude model (default: claude-opus-4-6; also via CLAUDE_MODEL env)
   --dev-review       Add execute + verify phases after bounce
   --bounce-only      Skip compose, bounce a file directly
   --output FILE      Write final output to a file instead of stdout
@@ -107,6 +108,11 @@ while [[ $# -gt 0 ]]; do
       [[ -z "$AGENT_A" || -z "$AGENT_B" ]] && die "--agents requires exactly two agents separated by comma (e.g., claude,codex)"
       shift 2
       ;;
+    --claude-model)
+      [[ $# -gt 1 ]] || die "--claude-model requires a value"
+      CLAUDE_MODEL="$2"
+      shift 2
+      ;;
     --dev-review) die "--dev-review is not yet implemented. Use dev-review/codex/dev-review.sh directly." ;;
     --bounce-only) BOUNCE_ONLY=true; shift ;;
     --output) OUTPUT_FILE="$2"; shift 2 ;;

diff --git a/evals/README.md b/evals/README.md
@@ -71,6 +71,8 @@ bash evals/tests/scorer-verification.sh
 
 Both are single-binary dependencies that Just Work on every supported platform.
 
+> Debian/Ubuntu's `apt install yq` installs the *Python* yq, which is not compatible with the mikefarah v4 syntax used here. The harness rejects it at startup with a clear message; install the mikefarah binary via the `go install` line above or from its releases page.
+
 ### Verification
 
 - **Tier 1 (golden-fixture regression):** `bash evals/tests/scorer-verification.sh` asserts the Bash scorer reproduces PS-produced `EXPECTED.json` outputs for all 10 fixture suites under `runners/codex-ps/evals/tests/fixtures/`.

diff --git a/evals/lib/co-evolution-evals.sh b/evals/lib/co-evolution-evals.sh
@@ -35,7 +35,8 @@ if ! declare -F die >/dev/null 2>&1; then
   die() {
     local message="${1:-Fatal error}"
     log "ERROR: $message"
-    exit 1
+    # F-6: honor an optional exit-code (2nd arg); default to 1 when omitted.
+    exit "${2:-1}"
   }
 fi
 
@@ -45,7 +46,9 @@ fi
 # ---------------------------------------------------------------------------
 
 ensure_yq() {
-  command -v yq >/dev/null 2>&1 || die "yq not found. Install mikefarah/yq: 'scoop install yq' (Windows), 'brew install yq' (macOS), 'apt install yq' (Linux go-install)."
+  # F-1: delegate to the shared guard in lib/co-evolution.sh (in scope at runtime
+  # via the eval-harness callers, which source both libs); rejects the python yq.
+  require_mikefarah_yq
 }
 
 ensure_jq() {

diff --git a/lab/pel/README.md b/lab/pel/README.md
@@ -20,6 +20,10 @@ isolation, canary smoke-test, and diff budget + allowlist enforcement (see
 via `bash lab/pel/classifier/classifier.sh` for debugging and for the Phase 4
 Plan 02 simulation test.
 
+## Prerequisites
+
+The policy-tier proposer and PR emitter shell out to `yq` for YAML mutations. It must be the mikefarah/Go `yq` (v4+), not the Python `yq` that Debian/Ubuntu's `apt install yq` provides; the two are incompatible. PEL now fails fast if the wrong one is on `PATH`. Install with `scoop install yq` (Windows), `brew install yq` (macOS), or `go install github.com/mikefarah/yq/v4@latest`.
+
 ## Env-var contract (v1.2)
 
 Callers (future Phases 5-8 proposers) MUST `export` the PEL_* variables explicitly

diff --git a/lab/pel/pr-emitter/pr-emitter.sh b/lab/pel/pr-emitter/pr-emitter.sh
@@ -567,8 +567,9 @@ if [[ "$CANARY_FAILED_MODE" == "false" ]]; then
       ;;
     policy)
       policy_sandbox_path="$EMITTER_SANDBOX/$TARGET"
-      if ! command -v yq >/dev/null 2>&1; then
-        die "yq required for policy-tier mutation apply (install mikefarah/yq v4+)" 2
+      # F-1: require mikefarah/Go yq v4 (the python yq is not compatible).
+      if ! command -v yq >/dev/null 2>&1 || ! yq --version 2>&1 | grep -qi mikefarah; then
+        die "mikefarah/yq (Go yq v4+) required for policy-tier mutation apply; the python 'yq' is not compatible (install from https://github.com/mikefarah/yq)" 2
       fi
       # Iterate the mutations array, applying each key=new pair via yq -i.
       # Process substitution keeps the loop in the parent shell so `die` exits

diff --git a/lab/pel/proposer/policy/proposer.sh b/lab/pel/proposer/policy/proposer.sh
@@ -41,8 +41,11 @@ REPO_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)"
 require_tools() {
   command -v jq >/dev/null 2>&1 \
     || { echo "ERROR: jq is required. Install: scoop install jq (Windows), brew install jq (macOS), apt install jq (Linux)." >&2; exit 2; }
-  command -v yq >/dev/null 2>&1 \
-    || { echo "ERROR: yq (mikefarah/Go yq v4+) is required. Install: scoop install yq (Windows), brew install yq (macOS), or see https://github.com/mikefarah/yq." >&2; exit 2; }
+  # F-1: reject the python yq (apt) -- require mikefarah/Go yq v4 by its --version.
+  if ! command -v yq >/dev/null 2>&1 || ! yq --version 2>&1 | grep -qi mikefarah; then
+    echo "ERROR: mikefarah/yq (Go yq v4+) is required; the python 'yq' is not compatible. Install: scoop install yq (Windows), brew install yq (macOS), or see https://github.com/mikefarah/yq." >&2
+    exit 2
+  fi
 }
 require_tools
 

diff --git a/lib/co-evolution.sh b/lib/co-evolution.sh
@@ -13,7 +13,19 @@ log() {
 die() {
   local message="${1:-Fatal error}"
   log "ERROR: $message"
-  exit 1
+  # F-6: honor an optional exit-code (2nd arg); default to 1 when omitted.
+  exit "${2:-1}"
+}
+
+# F-1: reject the wrong `yq`. The Debian/Ubuntu `apt install yq` ships the python
+# yq (kislyuk), which is NOT compatible with the mikefarah/Go yq v4 syntax this
+# project uses. mikefarah prints "mikefarah" in --version; the python yq does not.
+# Sites that cannot source this lib (the PEL components, by their self-containment
+# invariants) inline the same version check -- keep them in sync.
+require_mikefarah_yq() {
+  if ! command -v yq >/dev/null 2>&1 || ! yq --version 2>&1 | grep -qi mikefarah; then
+    die "mikefarah/yq (Go yq v4+) required; the python 'yq' is not compatible. Install: scoop install yq (Windows), brew install yq (macOS), or the binary from https://github.com/mikefarah/yq."
+  fi
 }
 
 # RNPT-05: Default per-phase timeout in seconds. Override via --timeout flag
@@ -33,6 +45,12 @@ LIVE_MODE_WARNING_LOGGED=false
 : "${DEV_REVIEW_BRANCH:=}"
 : "${DEV_REVIEW_WORKTREE:=}"
 
+# F-5a: Claude model override. CLAUDE_MODEL env var or --claude-model flag wins;
+# the default preserves the prior hardcoded value so behavior is unchanged unless
+# overridden. (CODEX_MODEL stays optional/unset by default -- invoke_codex only
+# appends -c model= when it is set.)
+: "${CLAUDE_MODEL:=claude-opus-4-6}"
+
 # RNPT-02: Authoritative list of phases that require write access to the workdir.
 # Phase code MUST NOT pass a hard-coded "true"/"false" to invoke_agent; it must
 # call `phase_is_writable "<phase-name>"` instead. To add a new writable phase
@@ -362,9 +380,9 @@ invoke_claude() {
 
   if [[ -n "${WSL_DISTRO_NAME:-}" ]] && command -v cmd.exe >/dev/null 2>&1; then
     # Under WSL, reuse the Windows Claude session because WSL and Windows keep separate auth state.
-    cmd=(cmd.exe /c claude -p --output-format text --model claude-opus-4-6 "${tool_flags[@]}")
+    cmd=(cmd.exe /c claude -p --output-format text --model "${CLAUDE_MODEL}" "${tool_flags[@]}")
   else
-    cmd=(claude -p --output-format text --model claude-opus-4-6 "${tool_flags[@]}")
+    cmd=(claude -p --output-format text --model "${CLAUDE_MODEL}" "${tool_flags[@]}")
   fi
 
   "${cmd[@]}" < "$prompt_file" > "$output_file" 2>"$stderr_file" || true

diff --git a/tests/claude-model-override-simulation.sh b/tests/claude-model-override-simulation.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+# tests/claude-model-override-simulation.sh
+# Hermetic gate for the Claude model override (audit finding F-5a).
+#
+# invoke_claude() previously hardcoded "--model claude-opus-4-6" in two places.
+# After the fix it must read $CLAUDE_MODEL (default claude-opus-4-6), so the
+# model is overridable via the CLAUDE_MODEL env var and the --claude-model flag
+# without changing the default. Precedence: --claude-model > CLAUDE_MODEL > default.
+#
+# Coverage:
+#   1. default model is the unchanged claude-opus-4-6
+#   2. CLAUDE_MODEL env override reaches the claude invocation
+#   3. co-evolve-bouncer.sh wires --claude-model to CLAUDE_MODEL
+#   4. --help documents --claude-model
+#
+# Pattern: a PATH-injected claude stub records the --model value it receives.
+
+set -uo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+LIB="$REPO_ROOT/lib/co-evolution.sh"
+BOUNCER="$REPO_ROOT/co-evolve-bouncer.sh"
+
+TEST_DIR="$(mktemp -d -t claude-model-XXXXXX)"
+trap 'rm -rf "$TEST_DIR"' EXIT
+
+TOTAL=0
+FAILURES=0
+pass() { printf "PASS: %s\n" "$1"; }
+fail() { printf "FAIL: %s\n" "$1" >&2; FAILURES=$((FAILURES + 1)); }
+
+# claude stub: record the value following --model, then emit a document body.
+mkdir -p "$TEST_DIR/bin"
+cat > "$TEST_DIR/bin/claude" <<'STUB'
+#!/usr/bin/env bash
+if [[ "$*" == *"--version"* ]]; then echo "claude 1.0.0 (model-stub)"; exit 0; fi
+model=""
+while [[ $# -gt 0 ]]; do
+  if [[ "$1" == "--model" ]]; then model="${2:-}"; shift 2; continue; fi
+  shift
+done
+[[ -n "${MODEL_MARKER:-}" ]] && printf '%s' "$model" > "$MODEL_MARKER"
+cat > /dev/null  # consume stdin
+echo "Stub document body with enough plain words to clear any downstream size check."
+STUB
+chmod +x "$TEST_DIR/bin/claude"
+
+# --- Scenario 1: default model is claude-opus-4-6 ---
+TOTAL=$((TOTAL + 1))
+marker="$TEST_DIR/m_default"
+(
+  unset CLAUDE_MODEL
+  export MODEL_MARKER="$marker"
+  export PATH="$TEST_DIR/bin:$PATH"
+  source "$LIB"
+  invoke_claude /dev/null "$TEST_DIR/out1" "$TEST_DIR/err1" false
+) >/dev/null 2>&1
+got="$(cat "$marker" 2>/dev/null || true)"
+if [[ "$got" == "claude-opus-4-6" ]]; then
+  pass "default model is claude-opus-4-6 (got '$got')"
+else
+  fail "default: expected claude-opus-4-6, got '$got'"
+fi
+
+# --- Scenario 2: CLAUDE_MODEL env override reaches the invocation ---
+TOTAL=$((TOTAL + 1))
+marker="$TEST_DIR/m_env"
+(
+  export CLAUDE_MODEL="claude-test-env-xyz"
+  export MODEL_MARKER="$marker"
+  export PATH="$TEST_DIR/bin:$PATH"
+  source "$LIB"
+  invoke_claude /dev/null "$TEST_DIR/out2" "$TEST_DIR/err2" false
+) >/dev/null 2>&1
+got="$(cat "$marker" 2>/dev/null || true)"
+if [[ "$got" == "claude-test-env-xyz" ]]; then
+  pass "CLAUDE_MODEL env override honored (got '$got')"
+else
+  fail "env override: expected claude-test-env-xyz, got '$got'"
+fi
+
+# --- Scenario 3: runner wires --claude-model to CLAUDE_MODEL ---
+TOTAL=$((TOTAL + 1))
+if grep -Eq -- '--claude-model\)' "$BOUNCER" && grep -Fq 'CLAUDE_MODEL="$2"' "$BOUNCER"; then
+  pass "co-evolve-bouncer.sh wires --claude-model to CLAUDE_MODEL"
+else
+  fail "co-evolve-bouncer.sh missing --claude-model -> CLAUDE_MODEL wiring"
+fi
+
+# --- Scenario 4: --help documents --claude-model ---
+TOTAL=$((TOTAL + 1))
+help_out="$(bash "$BOUNCER" --help 2>/dev/null || true)"
+if [[ "$help_out" == *"--claude-model"* ]]; then
+  pass "--help documents --claude-model"
+else
+  fail "--help does not document --claude-model"
+fi
+
+passed=$((TOTAL - FAILURES))
+if (( FAILURES == 0 )); then
+  echo "$passed/$TOTAL scenarios passed"
+  exit 0
+else
+  echo "$passed/$TOTAL scenarios passed ($FAILURES failed)" >&2
+  exit 1
+fi
diff --git a/tests/die-exit-code-simulation.sh b/tests/die-exit-code-simulation.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+# tests/die-exit-code-simulation.sh
+# Hermetic unit gate for die() exit-code propagation (audit finding F-6).
+#
+# die() historically ignored its optional second argument and always exited 1,
+# silently collapsing ~50 call sites that pass meaningful codes (2,3,5,6,8,9,10)
+# down to 1. After the fix die() must `exit "${2:-1}"`: honor an explicit code,
+# default to 1 when none is given. Both definitions are covered — the canonical
+# one in lib/co-evolution.sh and the guarded fallback in
+# evals/lib/co-evolution-evals.sh — so the two cannot drift.
+#
+# Pattern: each case runs in a pristine `bash -c` that sources one lib and calls
+# die, so no function definition leaks between cases and the
+# sourced-in-production path is exercised faithfully.
+
+set -uo pipefail  # NOT -e: cases capture their own exit codes
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+LIB="$REPO_ROOT/lib/co-evolution.sh"
+EVALS_LIB="$REPO_ROOT/evals/lib/co-evolution-evals.sh"
+
+TOTAL=0
+FAILURES=0
+
+pass() { printf "PASS: %s\n" "$1"; }
+fail() { printf "FAIL: %s\n" "$1" >&2; FAILURES=$((FAILURES + 1)); }
+
+# run_die <lib-path> <die-call> -> prints the exit code die produced.
+run_die() {
+  local lib="$1" call="$2"
+  bash -c "source \"$lib\"; $call" >/dev/null 2>&1
+  printf '%s' "$?"
+}
+
+# check <description> <lib-path> <die-call> <expected-code>
+check() {
+  TOTAL=$((TOTAL + 1))
+  local got
+  got="$(run_die "$2" "$3")"
+  if [[ "$got" == "$4" ]]; then
+    pass "$1 (exit $got)"
+  else
+    fail "$1: expected exit $4, got $got"
+  fi
+}
+
+check "lib die: no code defaults to 1"   "$LIB"       'die "boom"'    1
+check "lib die: explicit 2 honored"      "$LIB"       'die "boom" 2'  2
+check "lib die: explicit 5 honored"      "$LIB"       'die "boom" 5'  5
+check "lib die: explicit 10 honored"     "$LIB"       'die "boom" 10' 10
+check "evals die: no code defaults to 1" "$EVALS_LIB" 'die "boom"'    1
+check "evals die: explicit 3 honored"    "$EVALS_LIB" 'die "boom" 3'  3
+
+# die() must still log the message before exiting (fix preserves logging).
+TOTAL=$((TOTAL + 1))
+msg_out="$(bash -c "source \"$LIB\"; die \"distinct-marker-xyz\" 2" 2>&1 || true)"
+if [[ "$msg_out" == *"ERROR: distinct-marker-xyz"* ]]; then
+  pass "lib die: logs ERROR message before exit"
+else
+  fail "lib die: expected ERROR message in output, got: $msg_out"
+fi
+
+passed=$((TOTAL - FAILURES))
+if (( FAILURES == 0 )); then
+  echo "$passed/$TOTAL scenarios passed"
+  exit 0
+else
+  echo "$passed/$TOTAL scenarios passed ($FAILURES failed)" >&2
+  exit 1
+fi