Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions .github/workflows/prepare_submission.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
name: Prepare Submission for Merge

# pull_request_target runs in the BASE repo context (has secrets + write access)
# even when the PR comes from a fork. We never execute code from the fork —
# only our own scripts run, on the sanitized model_card.yaml content.
on:
pull_request_target:
types: [labeled]
branches: [main]
paths:
- "submissions/**"

jobs:
prepare:
runs-on: ubuntu-latest
# Trigger: maintainer adds 'ready-to-merge' label after human review + approval
if: github.event.label.name == 'ready-to-merge'
permissions:
contents: write
pull-requests: write

steps:
- name: Checkout base repo (our scripts)
uses: actions/checkout@v4
with:
ref: main
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}

- name: Fetch fork PR head (read-only, no code execution)
run: |
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr-head
git checkout pr-head -- submissions/

- name: Strip source code — keep only model_card.yaml per method dir
run: |
for method_dir in submissions/*/; do
method=$(basename "$method_dir")
if [ "$method" = "template" ]; then
continue
fi
echo "Cleaning $method_dir ..."
find "$method_dir" -mindepth 1 ! -name "model_card.yaml" -delete
done

- name: Validate model cards present
run: |
MISSING=0
for method_dir in submissions/*/; do
method=$(basename "$method_dir")
[ "$method" = "template" ] && continue
if [ ! -f "${method_dir}model_card.yaml" ]; then
echo "ERROR: missing model_card.yaml in $method_dir"
MISSING=1
fi
done
if [ "$MISSING" != "0" ]; then
exit 1
fi
echo "All model_card.yaml files present."

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pandas pyyaml

- name: Regenerate results CSVs from model cards
run: python scripts/leaderboard/generate_results_csvs.py

- name: Push clean branch to base repo
id: push
run: |
BRANCH="submissions/prepare-${{ github.event.pull_request.number }}"
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git checkout -b "$BRANCH"
git add submissions/ results/*.csv
if git diff --cached --quiet; then
echo "Nothing changed after cleanup."
else
git commit -m "leaderboard: strip submission src code and regenerate CSVs (#${{ github.event.pull_request.number }}) [skip ci]"
fi
git push origin "$BRANCH" --force
echo "branch=$BRANCH" >> $GITHUB_OUTPUT

- name: Create clean PR and comment on original
uses: actions/github-script@v7
with:
script: |
const branch = '${{ steps.push.outputs.branch }}';
const origPR = context.payload.pull_request.number;
const origTitle = context.payload.pull_request.title;

const cleanPR = await github.rest.pulls.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: `[prepared] ${origTitle}`,
head: branch,
base: context.payload.pull_request.base.ref,
body: `Automated clean branch from #${origPR}. Source code stripped, CSVs regenerated.`,
});

const body = [
'## Submission prepared for merge',
'',
`A clean branch \`${branch}\` has been created with:`,
'- Source code removed (only `model_card.yaml` retained)',
'- `results/*.csv` regenerated from all model cards',
'',
`**Maintainer:** review and merge the prepared PR: ${cleanPR.data.html_url}`,
].join('\n');

await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: origPR,
body,
});

- name: Close original fork PR
uses: actions/github-script@v7
with:
script: |
await github.rest.pulls.update({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: context.payload.pull_request.number,
state: 'closed',
});
176 changes: 176 additions & 0 deletions .github/workflows/review_submission.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
name: Review Submission

on:
pull_request:
branches: [main]
types: [opened, synchronize, reopened, labeled]
paths:
- "submissions/**"

jobs:
review:
runs-on: ubuntu-latest
# Only run when the PR carries the 'submission' label
if: contains(github.event.pull_request.labels.*.name, 'submission')
permissions:
pull-requests: write
contents: read

steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: false

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install anthropic pyyaml pandas

- name: Detect changed submission folders
id: detect
run: |
# Find all submissions/ directories that were added or modified in this PR
git diff --name-only origin/${{ github.base_ref }}...HEAD \
| grep -E '^submissions/[^/]+/' \
| sed 's|\(submissions/[^/]*\)/.*|\1|' \
| sort -u \
| grep -v '^submissions/template$' \
> changed_submissions.txt || true

echo "Changed submission dirs:"
cat changed_submissions.txt

if [ ! -s changed_submissions.txt ]; then
echo "found=false" >> $GITHUB_OUTPUT
else
echo "found=true" >> $GITHUB_OUTPUT
fi

- name: Run review for each changed submission
if: steps.detect.outputs.found == 'true'
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: |
mkdir -p review_outputs
EXIT_CODE=0

while IFS= read -r sub_dir; do
echo "Reviewing: $sub_dir"
method_slug=$(basename "$sub_dir")

python scripts/leaderboard/review_submission.py \
--submission "$sub_dir" \
--output "review_outputs/${method_slug}_report.json" \
--markdown "review_outputs/${method_slug}_summary.md" \
|| EXIT_CODE=$?

echo "Exit code for $sub_dir: $?"
done < changed_submissions.txt

# Store overall exit code for the comment step
echo "$EXIT_CODE" > review_outputs/exit_code.txt

- name: Post review comment on PR
if: steps.detect.outputs.found == 'true'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const path = require('path');

const outputDir = 'review_outputs';
let combinedBody = '## MassSpecGym Automated Submission Review\n\n';

const exitCodeFile = path.join(outputDir, 'exit_code.txt');
const exitCode = fs.existsSync(exitCodeFile)
? parseInt(fs.readFileSync(exitCodeFile, 'utf8').trim())
: 0;

if (exitCode !== 0) {
combinedBody += '> **BLOCKED: One or more hard failures detected. This PR cannot be merged until they are resolved.**\n\n';
} else {
combinedBody += '> No hard failures. Maintainer review required for any warnings.\n\n';
}

const mdFiles = fs.readdirSync(outputDir).filter(f => f.endsWith('_summary.md'));
if (mdFiles.length === 0) {
combinedBody += '_No submission directories detected in changed files._\n';
}
for (const f of mdFiles) {
combinedBody += fs.readFileSync(path.join(outputDir, f), 'utf8') + '\n\n---\n\n';
}

combinedBody += '_Generated by [review_submission.py](scripts/leaderboard/review_submission.py). ';
combinedBody += 'See [SUBMISSION_GUIDE.md](submissions/SUBMISSION_GUIDE.md) for requirements._';

// Find and update existing bot comment, or create new one
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
});

const botComment = comments.find(c =>
c.user.type === 'Bot' &&
c.body.includes('MassSpecGym Automated Submission Review')
);

if (botComment) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body: combinedBody,
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: combinedBody,
});
}

- name: Fail CI on hard failures
if: steps.detect.outputs.found == 'true'
run: |
EXIT_CODE=$(cat review_outputs/exit_code.txt 2>/dev/null || echo 0)
if [ "$EXIT_CODE" != "0" ]; then
echo "Hard failures detected in submission review. See PR comment for details."
exit 1
fi

- name: No submission changes detected
if: steps.detect.outputs.found == 'false'
run: |
echo "No changes in submissions/ directories detected. Skipping submission review."
echo "If you added results to results/*.csv without a model card, please also add"
echo "submissions/<method_name>/model_card.yaml (see submissions/SUBMISSION_GUIDE.md)."

# Separate job: always required, never skipped, even if the review job passes.
# Enforces that a human maintainer must approve before merge.
require-human-approval:
runs-on: ubuntu-latest
if: contains(github.event.pull_request.labels.*.name, 'submission')
needs: review
# This job always requires a human PR review approval — enforced via branch
# protection rules (Settings → Branches → main → Require approvals: 1).
# This step documents the requirement explicitly in CI output.
steps:
- name: Human sign-off required
run: |
echo "HUMAN REVIEW REQUIRED"
echo "The automated review has completed. A maintainer"
echo "must read the review report and approve this PR"
echo "before it can be merged."
echo ""
echo "Maintainers: see skills/review/SKILL.md for the"
echo "human review checklist."
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,8 @@ dmypy.json

# W&B
wandb/

# External codebases
external/
.claude/
checkpoints/
Loading