diff --git a/.github/workflows/tier1-benchmark.yml b/.github/workflows/tier1-benchmark.yml index ddf449e..b04460d 100644 --- a/.github/workflows/tier1-benchmark.yml +++ b/.github/workflows/tier1-benchmark.yml @@ -223,7 +223,7 @@ jobs: --memory=4Gi \ --cpu=2 \ --service-account="$SERVICE_ACCOUNT" \ - --set-env-vars="INTERACTION_DB_REPO=${INTERACTION_DB_REPO},INTERACTION_DB_TAG=${INTERACTION_DB_TAG},INTERACTION_DB_SHA256=${INTERACTION_DB_SHA256}" \ + --set-env-vars="INTERACTION_DB_REPO=${INTERACTION_DB_REPO},INTERACTION_DB_TAG=${INTERACTION_DB_TAG},INTERACTION_DB_SHA256=${INTERACTION_DB_SHA256},BENCHMARK_FORCE_OS_EXIT=1" \ --set-secrets="HF_TOKEN=HF_TOKEN:latest" \ --args="$ARGS" diff --git a/eval/run_benchmark.py b/eval/run_benchmark.py index f2085a9..df6e075 100644 --- a/eval/run_benchmark.py +++ b/eval/run_benchmark.py @@ -580,5 +580,14 @@ def main() -> int: return asyncio.run(_main_async(args)) +def exit_process(exit_code: int) -> None: + """Exit normally, or force-exit in Cloud Run if upload libraries keep threads alive.""" + if os.environ.get("BENCHMARK_FORCE_OS_EXIT") == "1": + sys.stdout.flush() + sys.stderr.flush() + os._exit(exit_code) + raise SystemExit(exit_code) + + if __name__ == "__main__": - raise SystemExit(main()) + exit_process(main()) diff --git a/tests/eval/test_run_benchmark.py b/tests/eval/test_run_benchmark.py index f9efbc2..ed5d702 100644 --- a/tests/eval/test_run_benchmark.py +++ b/tests/eval/test_run_benchmark.py @@ -7,6 +7,31 @@ from eval import run_benchmark +def test_exit_process_uses_system_exit_by_default(monkeypatch): + monkeypatch.delenv("BENCHMARK_FORCE_OS_EXIT", raising=False) + + with pytest.raises(SystemExit) as exc: + run_benchmark.exit_process(3) + + assert exc.value.code == 3 + + +def test_exit_process_can_force_os_exit_for_cloud(monkeypatch): + calls = [] + + def fake_os_exit(code): + calls.append(code) + raise RuntimeError("forced exit") + + monkeypatch.setenv("BENCHMARK_FORCE_OS_EXIT", "1") + monkeypatch.setattr(run_benchmark.os, "_exit", fake_os_exit) + + with pytest.raises(RuntimeError, match="forced exit"): + run_benchmark.exit_process(0) + + assert calls == [0] + + @pytest.mark.asyncio async def test_run_benchmark_captures_trace_metrics_and_artifacts(monkeypatch, tmp_path: Path): records = [ diff --git a/tests/test_tier1_benchmark_workflow.py b/tests/test_tier1_benchmark_workflow.py index a2c31a9..dfd1552 100644 --- a/tests/test_tier1_benchmark_workflow.py +++ b/tests/test_tier1_benchmark_workflow.py @@ -28,6 +28,7 @@ def test_tier1_benchmark_workflow_contract(): assert "INTERACTION_DB_REPO" in workflow assert "INTERACTION_DB_TAG" in workflow assert "INTERACTION_DB_SHA256" in workflow + assert "BENCHMARK_FORCE_OS_EXIT=1" in workflow assert 'tr -d "\\r\\n"' in workflow assert 'INTERACTION_DB_REPO="$(printf' in workflow assert 'INTERACTION_DB_TAG="$(printf' in workflow