From e2918e0b65648e30119c2d60fad4ba4fe7d31d63 Mon Sep 17 00:00:00 2001 From: Svetlana Perekrestova <32782746+SPerekrestova@users.noreply.github.com> Date: Sat, 23 May 2026 00:05:25 +0200 Subject: [PATCH] Fix benchmark manifest git SHA --- .github/workflows/tier1-benchmark.yml | 4 ++-- eval/run_benchmark.py | 3 +++ tests/eval/test_run_benchmark.py | 7 +++++++ tests/test_tier1_benchmark_workflow.py | 2 ++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tier1-benchmark.yml b/.github/workflows/tier1-benchmark.yml index b04460d..c5ae896 100644 --- a/.github/workflows/tier1-benchmark.yml +++ b/.github/workflows/tier1-benchmark.yml @@ -223,7 +223,7 @@ jobs: --memory=4Gi \ --cpu=2 \ --service-account="$SERVICE_ACCOUNT" \ - --set-env-vars="INTERACTION_DB_REPO=${INTERACTION_DB_REPO},INTERACTION_DB_TAG=${INTERACTION_DB_TAG},INTERACTION_DB_SHA256=${INTERACTION_DB_SHA256},BENCHMARK_FORCE_OS_EXIT=1" \ + --set-env-vars="INTERACTION_DB_REPO=${INTERACTION_DB_REPO},INTERACTION_DB_TAG=${INTERACTION_DB_TAG},INTERACTION_DB_SHA256=${INTERACTION_DB_SHA256},BENCHMARK_FORCE_OS_EXIT=1,BENCHMARK_GIT_COMMIT=${GITHUB_SHA}" \ --set-secrets="HF_TOKEN=HF_TOKEN:latest" \ --args="$ARGS" @@ -285,7 +285,7 @@ jobs: "run_id": manifest.get("run_id") == os.environ["RUN_ID"], "dataset_revision": manifest.get("dataset_revision") == os.environ["DATASET_REVISION"], "sample_size": 0 < sample_size <= expected_limit, - "git_commit": bool(manifest.get("git_commit")), + "git_commit": manifest.get("git_commit") not in ("", "0000000", None), "model_ids": bool(manifest.get("model_ids", {}).get("ner")), "ddinter_repo": manifest.get("ddinter_db", {}).get("repo") == os.environ["INTERACTION_DB_REPO"].strip("\r\n"), "ddinter_tag": manifest.get("ddinter_db", {}).get("tag") == os.environ["INTERACTION_DB_TAG"].strip("\r\n"), diff --git a/eval/run_benchmark.py b/eval/run_benchmark.py index 509279c..19b2bfb 100644 --- a/eval/run_benchmark.py +++ b/eval/run_benchmark.py @@ -787,6 +787,9 @@ def ddinter_metadata_from_args(args: argparse.Namespace) -> dict: def _git_commit() -> str: + env_commit = os.environ.get("BENCHMARK_GIT_COMMIT") or os.environ.get("GITHUB_SHA") + if env_commit: + return env_commit try: completed = subprocess.run( ["git", "rev-parse", "HEAD"], diff --git a/tests/eval/test_run_benchmark.py b/tests/eval/test_run_benchmark.py index 44abaed..90dfc2e 100644 --- a/tests/eval/test_run_benchmark.py +++ b/tests/eval/test_run_benchmark.py @@ -366,6 +366,13 @@ def test_manifest_includes_ddinter_release_metadata(): assert manifest["concurrency"] == 8 +def test_git_commit_uses_benchmark_env_when_git_metadata_missing(monkeypatch): + monkeypatch.setenv("BENCHMARK_GIT_COMMIT", "e9f24645baf2d64a0504ee5b03c7af9f767b74cb") + monkeypatch.setattr(run_benchmark.subprocess, "run", lambda *args, **kwargs: (_ for _ in ()).throw(RuntimeError)) + + assert run_benchmark._git_commit() == "e9f24645baf2d64a0504ee5b03c7af9f767b74cb" + + @pytest.mark.asyncio async def test_ensure_ddinter_db_downloads_from_github_release(monkeypatch, tmp_path: Path): checks = iter([False, True]) diff --git a/tests/test_tier1_benchmark_workflow.py b/tests/test_tier1_benchmark_workflow.py index dfd1552..5c9e46a 100644 --- a/tests/test_tier1_benchmark_workflow.py +++ b/tests/test_tier1_benchmark_workflow.py @@ -29,6 +29,7 @@ def test_tier1_benchmark_workflow_contract(): assert "INTERACTION_DB_TAG" in workflow assert "INTERACTION_DB_SHA256" in workflow assert "BENCHMARK_FORCE_OS_EXIT=1" in workflow + assert "BENCHMARK_GIT_COMMIT=${GITHUB_SHA}" in workflow assert 'tr -d "\\r\\n"' in workflow assert 'INTERACTION_DB_REPO="$(printf' in workflow assert 'INTERACTION_DB_TAG="$(printf' in workflow @@ -36,6 +37,7 @@ def test_tier1_benchmark_workflow_contract(): assert "hf://buckets/SPerva/pillchecker-experiments" in workflow assert 'bucket = os.environ["BENCHMARK_BUCKET"]' in workflow assert "0 < sample_size <= expected_limit" in workflow + assert 'manifest.get("git_commit") not in ("", "0000000", None)' in workflow assert "len(predictions) == sample_size" in workflow for artifact in ("manifest.json", "results.json", "predictions.jsonl", "errors.jsonl", "summary.md"):