diff --git a/agent-bouncer/agent-bouncer.sh b/agent-bouncer/agent-bouncer.sh index a062522..1f472b4 100644 --- a/agent-bouncer/agent-bouncer.sh +++ b/agent-bouncer/agent-bouncer.sh @@ -45,6 +45,15 @@ for agent in "$ODD_AGENT" "$EVEN_AGENT"; do fi done +# RNPT-05 parity: bound every bounce dispatch with invoke_agent_with_timeout so a +# network-stalled claude/codex read can't hang the run forever (the cause of the +# multi-hour zombie this guard was added for). Keeps the 3-arg +# invoke_(prompt, output, stderr) convention that validate_output's retry +# callback requires; closes over the per-pass global AGENT_NAME. +invoke_agent_bounded() { + invoke_agent_with_timeout "$AGENT_NAME" "$1" "$2" "$3" +} + # Generate a run name from the document content. # R-3: the LLM naming call is best-effort sugar — skip it entirely when the # codex CLI is absent, and fall back to a filename-derived label either way. @@ -59,7 +68,7 @@ $(head -20 "$PLAN_FILE")" NAME_OUTPUT_FILE=$(mktemp) NAME_STDERR_FILE=$(mktemp) printf '%s' "$NAME_PROMPT" > "$NAME_PROMPT_FILE" - invoke_codex "$NAME_PROMPT_FILE" "$NAME_OUTPUT_FILE" "$NAME_STDERR_FILE" + invoke_agent_with_timeout codex "$NAME_PROMPT_FILE" "$NAME_OUTPUT_FILE" "$NAME_STDERR_FILE" CANDIDATE=$(tr -d '\r\n ' < "$NAME_OUTPUT_FILE" | head -c 60 || true) rm -f "$NAME_PROMPT_FILE" "$NAME_OUTPUT_FILE" "$NAME_STDERR_FILE" fi @@ -156,10 +165,12 @@ $(cat "$TEMPLATE_DIR/bounce-protocol.md")" rm -f "$OUTPUT_FILE" STDERR_FILE="${RUN_DIR}/pass-${PASS}-stderr.log" RETRY_STDERR_FILE="${RUN_DIR}/pass-${PASS}-stderr-retry.log" - "invoke_${AGENT_NAME}" "$PROMPT_FILE" "$OUTPUT_FILE" "$STDERR_FILE" + invoke_agent_bounded "$PROMPT_FILE" "$OUTPUT_FILE" "$STDERR_FILE" # 8th arg (first-pass stderr) lets validate_output fail fast on # CLI-missing / auth-failure instead of accepting error text (R-1/R-2). - validate_output "$PLAN_FILE" "$OUTPUT_FILE" "$AGENT_NAME" "invoke_${AGENT_NAME}" "$PROMPT_FILE" "$RETRY_STDERR_FILE" 30 "$STDERR_FILE" || exit 1 + # Retry callback is the bounded shim too, so the empty/short-output retries + # inside validate_output are time-boxed as well (RNPT-05). + validate_output "$PLAN_FILE" "$OUTPUT_FILE" "$AGENT_NAME" invoke_agent_bounded "$PROMPT_FILE" "$RETRY_STDERR_FILE" 30 "$STDERR_FILE" || exit 1 cp "$OUTPUT_FILE" "${RUN_DIR}/pass-${PASS}-${ROLE}-${AGENT_NAME}-raw.md" diff --git a/co-evolve-bouncer.sh b/co-evolve-bouncer.sh index 6114b2a..ccae302 100644 --- a/co-evolve-bouncer.sh +++ b/co-evolve-bouncer.sh @@ -206,6 +206,13 @@ done TASK_AS_PATH="" [[ -n "$TASK" ]] && TASK_AS_PATH=$(normalize_path_for_bash "$TASK") +# RNPT-05 parity: compose/bounce phases now dispatch through +# invoke_agent_with_timeout, which runs the claude adapter inside a +# `timeout ... bash -c 'source lib; invoke_claude'` subprocess that re-sources +# the lib (re-applying the CLAUDE_MODEL default at line 59). Export so a +# --claude-model override survives that boundary instead of silently resetting. +export CLAUDE_MODEL + # Phase 3 LAB-01: opt-in lab routing. Dispatch BEFORE any side effects # (RUN_DIR creation, interview, compose). Byte-parity invariant (L-03): # when LAB_MODE is empty, this block is a no-op and the rest of the script @@ -453,7 +460,7 @@ ${CONTEXT_BLOCK}${INPUT_CONTENT}" log " Agent: $AGENT_A" log " Input: $INPUT_TYPE ($(echo "$INPUT_CONTENT" | wc -w | tr -d '\r\n ') words)" - invoke_agent "$AGENT_A" "$compose_prompt_file" "$compose_output_file" "$compose_stderr_file" + invoke_agent_with_timeout "$AGENT_A" "$compose_prompt_file" "$compose_output_file" "$compose_stderr_file" # R-1/R-2: fail fast on CLI-missing / auth-failure (rc 2) instead of # accepting the error text as a composed document or burning a retry. @@ -466,7 +473,7 @@ ${CONTEXT_BLOCK}${INPUT_CONTENT}" if [[ ! -s "$compose_output_file" ]] || (( $(wc -w < "$compose_output_file" | tr -d '\r\n ') < 10 )); then log " WARNING: compose returned empty or minimal output. Retrying once..." : > "$compose_output_file" - invoke_agent "$AGENT_A" "$compose_prompt_file" "$compose_output_file" "$compose_retry_stderr_file" + invoke_agent_with_timeout "$AGENT_A" "$compose_prompt_file" "$compose_output_file" "$compose_retry_stderr_file" compose_artifact_rc=0 validate_agent_artifact "$compose_output_file" "$compose_retry_stderr_file" "$AGENT_A" || compose_artifact_rc=$? @@ -568,7 +575,7 @@ $(cat "$PROTOCOL_TEMPLATE")" log " BOUNCE $pass/$total_passes - ${role} (${current_agent})" log "--------------------------------------------" - invoke_agent "$current_agent" "$prompt_file" "$output_file" "$stderr_file" + invoke_agent_with_timeout "$current_agent" "$prompt_file" "$output_file" "$stderr_file" # Validate output. R-1/R-2: rc 2 = CLI missing or unauthenticated — an # auth-error page must never be copied into WORKING_FILE as the document, @@ -581,7 +588,7 @@ $(cat "$PROTOCOL_TEMPLATE")" if [[ ! -s "$output_file" ]]; then log " WARNING: ${current_agent} returned empty output. Retrying..." - invoke_agent "$current_agent" "$prompt_file" "$output_file" "$stderr_file" + invoke_agent_with_timeout "$current_agent" "$prompt_file" "$output_file" "$stderr_file" bounce_artifact_rc=0 validate_agent_artifact "$output_file" "$stderr_file" "$current_agent" || bounce_artifact_rc=$? diff --git a/lib/co-evolution.sh b/lib/co-evolution.sh index 8478cf7..5f9db2c 100644 --- a/lib/co-evolution.sh +++ b/lib/co-evolution.sh @@ -1603,7 +1603,7 @@ invoke_agent_with_timeout() { "${BASH_SOURCE[0]}" "$prompt_file" "$output_file" "$stderr_file" \ || exit_code=$? ;; - opus) + claude|opus) _run_with_phase_timeout "$effective_timeout" \ bash -c 'source "$1"; invoke_claude "$2" "$3" "$4" "$5"' _ \ "${BASH_SOURCE[0]}" "$prompt_file" "$output_file" "$stderr_file" "$writable" \