From 2dce42052d2d7d5d043bac82905cca38ffb288aa Mon Sep 17 00:00:00 2001 From: k-yoshimi Date: Tue, 21 Apr 2026 05:33:56 +0900 Subject: [PATCH 1/3] ci: add debug workflow (signaling-NaN init + fcheck=all) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Diagnostic workflow that builds libXapi.so with gfortran runtime traps enabled and runs the tests that currently SIGABRT/SEGV in the release CI. The goal is to surface uninit-read / array-bounds violations with clean backtraces so they can be fixed one at a time. Key flags (overriding OFLAGS so everything built with $(OFLAGS) inherits them): -fbounds-check -fcheck=all -ffpe-trap=invalid,zero,overflow -fbacktrace -finit-real=snan (read-before-write -> FPE trap) -finit-integer=-8888 (obvious sentinel) -finit-logical=false -finit-derived (recursively init derived-type members) -O0 -g (no optimization, full debug symbols) Trigger model: - workflow_dispatch: manual run from the Actions tab - pull_request labeled 'debug-ci': apply the label to a PR for a one-shot diagnostic run The pytest step sets continue-on-error=true and uploads the log as an artifact — the whole point is to capture crash backtraces, not to green-check. Phase 1 of the post-#135 followup plan discussed with user: diagnose -> fix -> land per-bug PRs until the release CI can run the full test suite without --deselect. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/python-tests-debug.yml | 152 +++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 .github/workflows/python-tests-debug.yml diff --git a/.github/workflows/python-tests-debug.yml b/.github/workflows/python-tests-debug.yml new file mode 100644 index 00000000..21e20d65 --- /dev/null +++ b/.github/workflows/python-tests-debug.yml @@ -0,0 +1,152 @@ +name: python-tests-debug + +# Diagnostic job: build libXapi.so with gfortran runtime traps +# (bounds check, signaling-NaN init for reals/integers/derived, +# FP exception trap) and run the tests that currently CRASH in +# the release CI build. Goal: surface exact uninit / bounds +# violations with backtraces so they can be fixed one at a time. +# +# Triggers: +# - workflow_dispatch: manual run from the Actions tab +# - pull_request labeled 'debug-ci': apply the `debug-ci` label +# to a PR to get a diagnostic run for that PR's HEAD +on: + workflow_dispatch: + pull_request: + types: [labeled, synchronize] + +jobs: + debug: + # Only run when explicitly requested via label or manual trigger. + if: | + github.event_name == 'workflow_dispatch' || + (github.event_name == 'pull_request' && + contains(github.event.pull_request.labels.*.name, 'debug-ci')) + name: pytest-debug (signaling-NaN init + bounds check) + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Fortran/C build deps + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + gfortran gcc make + + - name: Clone BPSD + apply patch + run: | + git clone --depth 1 https://github.com/ats-fukuyama/bpsd.git \ + "$GITHUB_WORKSPACE/../bpsd" + if [[ -f docs/external-patches/bpsd/bpsd-species-kid-oob-fix.patch ]]; then + git -C "$GITHUB_WORKSPACE/../bpsd" apply \ + "$GITHUB_WORKSPACE/docs/external-patches/bpsd/bpsd-species-kid-oob-fix.patch" + fi + + - name: Provision mtxp/make.mtxp from nompi template + run: | + cp mtxp/make.mtxp.nompi mtxp/make.mtxp + + - name: Provision make.header — DEBUG profile (OFLAGS := DFLAGS) + # Key difference from the release CI job: OFLAGS is set to the + # strict debug flag set so everything the Makefiles compile with + # $(OFLAGS) gets the runtime traps. Adds -finit-* so any read of + # a not-yet-written real / integer / logical / derived-type + # member trips a signalling-NaN FPE or SIGABRT immediately, + # with a clean -fbacktrace line. + run: | + cat > make.header <<'HEADER_EOF' + ### CI-generated make.header (DEBUG: signaling-NaN init, fcheck=all) + LAPACK = nolapack.f + LIBLA = + MODLA95 = + MDSPLUS = nomdsplus.f + MDSLIB = + MF77 = mpif77 + MF90 = mpif90 + MF95 = mpif90 + MFC = $(MF90) + + ## linux gfortran (64bit), no graphics linkage (libXapi.so path) + GFLIBS= + OFLAGS = -g -O0 -m64 -std=legacy \ + -fbounds-check -fcheck=all \ + -ffpe-trap=invalid,zero,overflow -fbacktrace \ + -finit-real=snan -finit-integer=-8888 \ + -finit-logical=false -finit-derived + DFLAGS = $(OFLAGS) + FCFIXED = gfortran -ffixed-form + FCFREE = gfortran -ffree-form + MOD = mod + MODDIR = -Jmod + LD=ld + LDFLAGS=-r -o + FPP= + HEADER_EOF + head -20 make.header + + - name: Install Python test dependencies + run: | + python -m pip install --upgrade pip + pip install pytest pytest-mock pytest-subtests pytest-forked + pip install tomli + + - name: Build PIC support libraries (debug flags) + run: | + set -e + make -C "$GITHUB_WORKSPACE/../bpsd" libbpsd.a + make -C lib libtask_pic.a libgrf_pic.a libmds_pic.a + make -C mtxp libmtxnompi_pic.o libmtxbnd_pic.o + make -C tr bpsd_pic + make -C pl libpl_noeq_pic + make -C eq libeq_pic.a + make -C pl libpl_pic.a + make -C dp libdp_pic.a + make -C ob libob_pic.a + make -C open-adas/adf11/adf11-lib lib-adf11_pic.a + make -C adpost lib-adpost_pic.a + + - name: Build module shared libraries (debug) + run: | + set -e + for mod in tr fp ti wr wrx eq tot; do + echo "::group::Building $mod/lib${mod}api.so (debug)" + if [[ "$mod" != "tot" ]]; then + make -C "$mod" bpsd_pic + fi + make -C "$mod" "lib${mod}api.so" + echo "::endgroup::" + done + + - name: Run FULL pytest suite (expect crashes with backtraces) + # continue-on-error: the whole point is to SURFACE crashes that + # the release CI skips. We want the logs, not a green check. + continue-on-error: true + env: + PYTHONDONTWRITEBYTECODE: "1" + PYTHONPATH: python + WRX_RUN_OK: "1" + WRX_REINIT_OK: "1" + run: | + python -m pytest python/ \ + --forked \ + --tb=short \ + -ra \ + --maxfail=50 \ + --ignore-glob='*property_boundary*' \ + --ignore-glob='*property_fanout*' \ + 2>&1 | tee pytest-debug.log + + - name: Upload diagnostic log + # Always upload so we can inspect crashes from the Actions UI. + if: always() + uses: actions/upload-artifact@v4 + with: + name: pytest-debug-log + path: pytest-debug.log + retention-days: 14 From 42f074858d0c621236b1e9910bc3befe9fdbff66 Mon Sep 17 00:00:00 2001 From: k-yoshimi Date: Tue, 21 Apr 2026 05:44:15 +0900 Subject: [PATCH 2/3] ci(debug): set -o pipefail so tee does not mask pytest exit code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bugbot caught that `2>&1 | tee pytest-debug.log` swallows pytest's exit code (pipeline defaults to tee's 0), making continue-on-error dead — the step always showed green in the UI even when every test crashed. `set -o pipefail` + explicit `shell: bash` fixes that so Actions displays red when pytest exited non-zero, even while the job itself continues to subsequent steps (the `continue-on-error: true` contract). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/python-tests-debug.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-tests-debug.yml b/.github/workflows/python-tests-debug.yml index 21e20d65..fc72f910 100644 --- a/.github/workflows/python-tests-debug.yml +++ b/.github/workflows/python-tests-debug.yml @@ -125,14 +125,20 @@ jobs: - name: Run FULL pytest suite (expect crashes with backtraces) # continue-on-error: the whole point is to SURFACE crashes that - # the release CI skips. We want the logs, not a green check. + # the release CI skips. When pytest exits non-zero, the step + # must ALSO exit non-zero so the Actions UI shows red — that + # way a fresh run is visually distinguishable from one where + # everything crashed cleanly. `set -o pipefail` ensures the + # `| tee` does not swallow pytest's exit code. continue-on-error: true env: PYTHONDONTWRITEBYTECODE: "1" PYTHONPATH: python WRX_RUN_OK: "1" WRX_REINIT_OK: "1" + shell: bash run: | + set -o pipefail python -m pytest python/ \ --forked \ --tb=short \ From 7909ca61614f8b0dc221eb537e9b74601abea00c Mon Sep 17 00:00:00 2001 From: k-yoshimi Date: Tue, 21 Apr 2026 06:26:56 +0900 Subject: [PATCH 3/3] ci(debug): drop continue-on-error so crashes turn the job red MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bugbot caught that step-level `continue-on-error: true` always produces a green step status, directly contradicting the comment and the PR description. The artifact upload has its own `if: always()` guard, so removing `continue-on-error` does not lose logs on failure — it just makes the Actions UI honestly show red when pytest exits non-zero, which is the entire point of the workflow. `set -o pipefail` stays (so `| tee` does not swallow pytest's exit code before it reaches the job status). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/python-tests-debug.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/python-tests-debug.yml b/.github/workflows/python-tests-debug.yml index fc72f910..498bdb11 100644 --- a/.github/workflows/python-tests-debug.yml +++ b/.github/workflows/python-tests-debug.yml @@ -124,13 +124,13 @@ jobs: done - name: Run FULL pytest suite (expect crashes with backtraces) - # continue-on-error: the whole point is to SURFACE crashes that - # the release CI skips. When pytest exits non-zero, the step - # must ALSO exit non-zero so the Actions UI shows red — that - # way a fresh run is visually distinguishable from one where - # everything crashed cleanly. `set -o pipefail` ensures the - # `| tee` does not swallow pytest's exit code. - continue-on-error: true + # The whole point is to SURFACE crashes that the release CI + # skips. When pytest exits non-zero, the step MUST show red + # in the Actions UI. `set -o pipefail` ensures `| tee` does + # not swallow pytest's exit code. `continue-on-error` is NOT + # set — we rely on the next step's `if: always()` to upload + # the artifact even after this step fails, while the job + # status correctly reflects the crash. env: PYTHONDONTWRITEBYTECODE: "1" PYTHONPATH: python