Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
3878fd2
chore(ci-gate): add ORCHESTRATE implementation plan
Jun 14, 2026
07041f7
ci(test): add non-blocking full-suite job (Phase 1 measure)
Jun 14, 2026
f8c7bad
docs(ci-gate): record Phase 1 CI ground truth (51/14/0)
Jun 14, 2026
9c5b0bd
ci(tmp): diagnostic job for 3 pure-zsh CI-only failures
Jun 14, 2026
54e61ff
fix(loader): keep tm dispatcher when a tm binary is on PATH
Jun 14, 2026
401aea0
ci(tmp): diagnose why tm still dropped on runner despite fix
Jun 14, 2026
f992d0a
revert(loader): undo wrong tm-shadow fix; record real cause (ait absent)
Jun 14, 2026
d490923
docs(ci-gate): correct triage record — 14 failures are one tool-absen…
Jun 14, 2026
599abcd
test(ci-gate): deterministic tm/aiterm skip (3 suites) + run-all rc-77
Jun 14, 2026
81f8c72
fix(cache): zsh-compatible flock fd allocation (Linux-only doctor bug)
Jun 14, 2026
706be60
test(ci-gate): deterministic tool-absent skips (cc/em/teach-deploy/te…
Jun 14, 2026
2aca392
ci(tmp): diagnose 3 suites that fail on CI but pass locally
Jun 14, 2026
3cb3909
test(ci-gate): determinism for atlas-PRESENT skew (local-only failures)
Jun 14, 2026
3af3607
fix(em-cache): portable file mtime (stat -f is macOS-only) + atlas-br…
Jun 14, 2026
26db341
ci(tmp): re-point diagnostic at teach-deploy + env fingerprint
Jun 14, 2026
cedb344
ci: provision git identity for full-suite + diagnostic jobs
Jun 14, 2026
f80e79c
fix(teaching-utils): portable date parsing (date -j is macOS-only)
Jun 14, 2026
c10b7d0
fix(cache): GNU-first stat order — BSD-first corrupts mtime on Linux
Jun 14, 2026
54a4be3
fix(teach-deploy): portable teaching_week date; remove temp diagnosti…
Jun 14, 2026
4a70cf0
docs(ci-gate): record Phase 2 completion (suite green on runner)
Jun 14, 2026
8190c79
docs(testing): document CI full-suite gate + rc-77 skip convention
Jun 14, 2026
aea140b
docs(changelog): record Phase 2 cross-platform bug fixes + gate
Jun 14, 2026
816f0b0
refactor(ci-gate): address PR #465 review nits
Jun 14, 2026
92dbf76
chore(ci-gate): remove ORCHESTRATE working artifact before dev merge
Jun 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,86 @@ jobs:
echo "| Metric | Value |" >> $GITHUB_STEP_SUMMARY
echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY
echo "| Duration | ${DURATION}s |" >> $GITHUB_STEP_SUMMARY

# ---------------------------------------------------------------------------
# ci-full-suite-gate (Phase 1+2): run the full 65-suite run-all.sh in CI.
# Phase 1 measured the ground truth; Phase 2 made it green (service/tool-absent
# suites clean-skip via rc 77). Still NON-BLOCKING (continue-on-error) for a
# dev soak — Phase 3 promotes it to a required check (dev → main protection).
# Do NOT add to required checks while this comment is here.
# ---------------------------------------------------------------------------
full-suite:
name: Full Test Suite (non-blocking)
runs-on: ubuntu-latest
continue-on-error: true

steps:
- name: Checkout code
uses: actions/checkout@v6

- name: Ensure zsh is installed
run: |
if ! command -v zsh >/dev/null 2>&1; then
sudo apt-get update && sudo apt-get install -y zsh
fi

- name: Configure git identity
run: |
# Fresh runners have no git identity; suites that exercise the deploy
# workflow (teach-deploy) run `git commit` and fail with "empty ident"
# without it. This is CI environment provisioning, not a test change.
git config --global user.email "ci@flow-cli.test"
git config --global user.name "flow-cli CI"
git config --global init.defaultBranch main

- name: Record start time
id: start
run: echo "time=$(date +%s)" >> $GITHUB_OUTPUT

- name: Create mock project structure
run: |
mkdir -p ~/projects/dev-tools/flow-cli/.git
mkdir -p ~/projects/r-packages/active/mediationverse/.git
mkdir -p ~/projects/r-packages/stable/rmediation/.git
mkdir -p ~/projects/teaching/stat-440/.git
mkdir -p ~/projects/research/mediation-planning/.git
mkdir -p ~/projects/quarto/manuscripts/paper1/.git
mkdir -p ~/projects/apps/examify/.git
cp -r . ~/projects/dev-tools/flow-cli/

- name: Run full suite (non-blocking)
id: fullsuite
run: |
cd ~/projects/dev-tools/flow-cli
set +e
# Capture run-all.sh output; tee returns 0, so grab run-all's real
# exit via PIPESTATUS (1=FAIL, 2=TIMEOUT, 0=clean) and re-exit with it
# so the job color reflects reality. continue-on-error keeps it from
# blocking the PR.
./tests/run-all.sh 2>&1 | tee /tmp/full-suite.log
rc=${PIPESTATUS[0]}
echo "rc=$rc" >> "$GITHUB_OUTPUT"
echo "Full suite exit code: $rc"
exit "$rc"

- name: Full Suite Summary
if: always()
run: |
END_TIME=$(date +%s)
DURATION=$((END_TIME - ${{ steps.start.outputs.time }}))
{
echo "## 🧪 Full Suite (run-all.sh) — non-blocking measurement"
echo ""
echo "| Metric | Value |"
echo "|--------|-------|"
echo "| Duration | ${DURATION}s |"
echo "| Exit code | \`${{ steps.fullsuite.outputs.rc }}\` (0=clean, 1=FAIL, 2=TIMEOUT) |"
echo ""
echo "<details><summary>Full run-all.sh output</summary>"
echo ""
echo '```'
cat /tmp/full-suite.log 2>/dev/null || echo "(no log captured)"
echo '```'
echo ""
echo "</details>"
} >> "$GITHUB_STEP_SUMMARY"
31 changes: 31 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed

- **Cache locking errored on Linux** (`lib/doctor-cache.zsh`,
`lib/analysis-cache.zsh`): the `flock` path used bash-only high-fd
redirection (`exec 201>`/`exec 200>`), which zsh parses as a command and
fails with "command not found" on Linux (where `flock` exists). Switched to
zsh's dynamic `exec {var}>` allocation. macOS was unaffected (no `flock` →
mkdir fallback), so this only ever broke on Linux/CI.
- **Email cache never worked on Linux** (`lib/em-cache.zsh`): used macOS-only
`stat -f %m`, so every entry's mtime read as 0 and looked expired. Added a
portable `_em_cache_mtime` (GNU `stat -c %Y` first — it fails cleanly on
macOS — then BSD `stat -f %m`).
- **`teaching_week` computed 0 on Linux** (`lib/teaching-utils.zsh`,
`lib/dispatchers/teach-deploy-enhanced.zsh`): used macOS-only `date -j -f`.
Added portable date helpers (BSD then GNU `date -d`).
- **`flow doctor --help-check` false-flagged `tm`** on machines without aiterm
(`lib/help-compliance.zsh`): the `tm` dispatcher only loads its help when the
`ait` CLI is present, so it's now checked only when `ait` is installed.

### Changed

- **CI now runs the full test suite on every PR.** Added a `full-suite` job to
`.github/workflows/test.yml` running `./tests/run-all.sh` (the full 65-suite
suite), parallel to the fast smoke job. It starts non-blocking
(`continue-on-error`) and is promoted to a required check after soaking green.
- **`run-all.sh` skip semantics:** exit code **77** now counts a suite as
*skipped* (not failed) — used by suites that require an external tool/service
(`atlas`, `ait`, `himalaya`, R, quarto, `claude`) absent on a hosted runner.
Service-dependent suites skip/degrade cleanly; standalone-behavior suites pin
`FLOW_ATLAS_ENABLED=no` so results are identical with or without atlas.

## [7.10.0] — 2026-06-13 — forward-looking schedule layer (`agenda` + dash UPCOMING)

### Added
Expand Down
31 changes: 31 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,37 @@ The format follows [Keep a Changelog](https://keepachangelog.com/), and this pro

## [Unreleased]

### Fixed

- **Cache locking errored on Linux** (`lib/doctor-cache.zsh`,
`lib/analysis-cache.zsh`): the `flock` path used bash-only high-fd
redirection (`exec 201>`/`exec 200>`), which zsh parses as a command and
fails with "command not found" on Linux (where `flock` exists). Switched to
zsh's dynamic `exec {var}>` allocation. macOS was unaffected (no `flock` →
mkdir fallback), so this only ever broke on Linux/CI.
- **Email cache never worked on Linux** (`lib/em-cache.zsh`): used macOS-only
`stat -f %m`, so every entry's mtime read as 0 and looked expired. Added a
portable `_em_cache_mtime` (GNU `stat -c %Y` first — it fails cleanly on
macOS — then BSD `stat -f %m`).
- **`teaching_week` computed 0 on Linux** (`lib/teaching-utils.zsh`,
`lib/dispatchers/teach-deploy-enhanced.zsh`): used macOS-only `date -j -f`.
Added portable date helpers (BSD then GNU `date -d`).
- **`flow doctor --help-check` false-flagged `tm`** on machines without aiterm
(`lib/help-compliance.zsh`): the `tm` dispatcher only loads its help when the
`ait` CLI is present, so it's now checked only when `ait` is installed.

### Changed

- **CI now runs the full test suite on every PR.** Added a `full-suite` job to
`.github/workflows/test.yml` running `./tests/run-all.sh` (the full 65-suite
suite), parallel to the fast smoke job. It starts non-blocking
(`continue-on-error`) and is promoted to a required check after soaking green.
- **`run-all.sh` skip semantics:** exit code **77** now counts a suite as
*skipped* (not failed) — used by suites that require an external tool/service
(`atlas`, `ait`, `himalaya`, R, quarto, `claude`) absent on a hosted runner.
Service-dependent suites skip/degrade cleanly; standalone-behavior suites pin
`FLOW_ATLAS_ENABLED=no` so results are identical with or without atlas.

## [7.10.0] — 2026-06-13 — forward-looking schedule layer (`agenda` + dash UPCOMING)

### Added
Expand Down
77 changes: 63 additions & 14 deletions docs/guides/TESTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ flow-cli uses a **shared test framework** (`tests/test-framework.zsh`) with comp

| Metric | Count |
|--------|-------|
| Test files | 210 |
| Test suites (run-all.sh) | 58/58 passing |
| Test files | 213 |
| Test suites (run-all.sh) | 65 total — 64 passed, 1 skipped, 0 failed |
| Test functions | 12,000+ |
| Expected timeouts | 1 (IMAP connectivity) |
| Expected skips | 1 (`e2e-em-dispatcher` — needs configured IMAP account) |
| CI | runs the full suite on every PR (green on the Ubuntu runner) |

---

Expand Down Expand Up @@ -263,7 +264,36 @@ zsh tests/test-work.zsh
./tests/run-all.sh
```

65 suites, ~12000 assertions. Expected: 64/64 pass, 1 timeout (IMAP connectivity).
65 suites, ~12000 assertions. Expected: **64 passed, 0 failed, 0 timeout, 1 skipped**.
The 1 skip is `e2e-em-dispatcher` (needs a configured IMAP account; skips cleanly
otherwise). `run-all.sh` exits **0** when there are no failures or timeouts.

#### Skip semantics (exit code 77)

A suite that requires an external tool/service which is absent must **skip
cleanly** rather than fail. Exit **77** (the automake "skip" convention) tells
`run-all.sh` to count the suite as ⏭️ skipped, not ❌ failed:

```zsh
# Whole-suite guard — put after sourcing, before the tests:
command -v yq >/dev/null 2>&1 || { echo "SKIP: yq not installed"; exit 77; }
```

For a **mixed** suite (most cases are tool-independent), gate only the
tool-dependent cases instead of skipping the whole file — e.g. include the `tm`
dispatcher in dispatcher-enumeration checks only `if command -v ait`, so the
other assertions still run. This keeps full coverage on a dev machine that has
the tool while staying green on a hosted runner that doesn't.

Tools whose absence triggers a skip on CI: `atlas`, `ait` (aiterm),
`himalaya` (IMAP), `R`/`renv`, `quarto`, `claude`. Skips are printed in the
suite output and summarised in the `run-all.sh` results line, so a skip is
always visible (never a silently-missing pass).

> **Determinism:** suites that assert flow-cli's *standalone* behavior pin
> `FLOW_ATLAS_ENABLED=no` in setup so the result can't flip based on whether
> `atlas` happens to be installed. The suite is green locally **with or without**
> atlas, and on the runner (which has neither atlas nor the other tools above).

### Dogfood Quality Check

Expand Down Expand Up @@ -302,21 +332,40 @@ test_something() {

## Continuous Integration

### GitHub Actions (`test.yml`)
### GitHub Actions (`.github/workflows/test.yml`)

Tests run automatically on push and PR to `main`/`dev`, in **two parallel jobs**:

| Job | Runs | Purpose |
|-----|------|---------|
| **ZSH Plugin Tests** (`zsh-tests`) | smoke tests (`test-flow.zsh`, `test-install.sh`) + man-page version-sync guard | fast signal; the long-standing required check |
| **Full Test Suite** (`full-suite`) | the whole `./tests/run-all.sh` (~4 min) | comprehensive gate — runs every PR |

The runner has no `atlas`, `ait`, `himalaya`, `R`, or `quarto`, so service-
dependent suites **skip** there (see "Skip semantics" above); everything else
must pass. A git identity is provisioned in the job so deploy suites that
`git commit` work. The `full-suite` job captures the real exit code via
`PIPESTATUS` (so its colour reflects reality) and emits the full `run-all.sh`
output to the job summary.

Tests run automatically on push and PR:
> **Phasing:** `full-suite` starts as a **non-blocking** measurement job
> (`continue-on-error: true`) so it can never create a perpetually-red gate
> while the suite is being made deterministic. Once it has soaked green it is
> promoted to a **required** status check on `dev`, then `main`.

```yaml
name: ZSH Plugin Tests
on: [push, pull_request]
jobs:
test:
full-suite:
name: Full Test Suite (non-blocking)
runs-on: ubuntu-latest
continue-on-error: true # measurement phase; drop when promoting to required
steps:
- uses: actions/checkout@v4
- name: Install ZSH
run: sudo apt-get install -y zsh
- name: Run Tests
- uses: actions/checkout@v6
- name: Configure git identity
run: |
git config --global user.email "ci@flow-cli.test"
git config --global user.name "flow-cli CI"
# ... mock project structure ...
- name: Run full suite (non-blocking)
run: ./tests/run-all.sh
```

Expand Down
22 changes: 16 additions & 6 deletions lib/analysis-cache.zsh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ if ! typeset -f _flow_log_debug >/dev/null 2>&1; then
source "${0:A:h}/core.zsh" 2>/dev/null || true
fi

# Mutable module state: the flock file descriptor allocated by `exec {var}>` in
# the acquire path and closed in the release path (a different function). Declare
# it `-g` explicitly so the cross-function reference is unambiguous rather than
# relying on zsh's implicit-global-on-assignment behaviour.
typeset -g _ANALYSIS_CACHE_LOCK_FD=""

# =============================================================================
# CONSTANTS
# =============================================================================
Expand Down Expand Up @@ -182,9 +188,12 @@ _cache_acquire_lock() {
# Create lock file if it doesn't exist
touch "$lock_path" 2>/dev/null

# Use flock with timeout
exec 200>"$lock_path"
if ! flock -w "$ANALYSIS_CACHE_LOCK_TIMEOUT" 200 2>/dev/null; then
# Use flock with timeout. zsh requires the dynamic `{var}` form for
# file descriptors >= 10; the literal `exec 200>` is bash-only and
# errors in zsh ("command not found: 200") on Linux — where this flock
# branch runs. macOS lacks flock and uses the mkdir fallback below.
exec {_ANALYSIS_CACHE_LOCK_FD}>"$lock_path"
if ! flock -w "$ANALYSIS_CACHE_LOCK_TIMEOUT" "$_ANALYSIS_CACHE_LOCK_FD" 2>/dev/null; then
_flow_log_debug "Failed to acquire cache lock (timeout)" 2>/dev/null
return 1
fi
Expand Down Expand Up @@ -238,9 +247,10 @@ _cache_release_lock() {
local lock_path
lock_path=$(_cache_get_lock_path "$course_dir")

# Release flock (if using flock)
if command -v flock >/dev/null 2>&1; then
exec 200>&- 2>/dev/null || true
# Release flock (if using flock). Close the dynamically-allocated fd from
# the acquire path (zsh {var} form; see the note there).
if command -v flock >/dev/null 2>&1 && [[ -n "$_ANALYSIS_CACHE_LOCK_FD" ]]; then
exec {_ANALYSIS_CACHE_LOCK_FD}>&- 2>/dev/null || true
fi

# Remove mkdir-based lock
Expand Down
3 changes: 2 additions & 1 deletion lib/dispatchers/teach-deploy-enhanced.zsh
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,8 @@ _deploy_update_status_file() {
start_date=$(yq '.semester_info.start_date // ""' .flow/teach-config.yml 2>/dev/null)
if [[ -n "$start_date" && "$start_date" != "null" ]]; then
local start_epoch today_epoch week_num
start_epoch=$(date -j -f "%Y-%m-%d" "$start_date" "+%s" 2>/dev/null)
# Portable: BSD `date -j -f` (macOS) then GNU `date -d` (Linux/CI).
start_epoch=$(date -j -f "%Y-%m-%d" "$start_date" "+%s" 2>/dev/null || date -d "$start_date" "+%s" 2>/dev/null)
today_epoch=$(date "+%s")
if [[ -n "$start_epoch" ]]; then
week_num=$(( (today_epoch - start_epoch) / 604800 + 1 ))
Expand Down
2 changes: 1 addition & 1 deletion lib/dispatchers/teach-dispatcher.zsh
Original file line number Diff line number Diff line change
Expand Up @@ -4880,7 +4880,7 @@ _teach_show_status_full() {
# Find most recent backup
local recent=$(_teach_list_backups "$content_dir" | head -1)
if [[ -n "$recent" ]]; then
local backup_time=$(stat -f %m "$recent" 2>/dev/null || stat -c %Y "$recent" 2>/dev/null)
local backup_time=$(stat -c %Y "$recent" 2>/dev/null || stat -f %m "$recent" 2>/dev/null)
if [[ "$backup_time" -gt "$latest_backup_time" ]]; then
latest_backup_time=$backup_time
latest_backup=$(basename "$recent")
Expand Down
8 changes: 4 additions & 4 deletions lib/dispatchers/teach-doctor-impl.zsh
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ _teach_doctor_check_r_quick() {
# renv.lock freshness
if [[ "$verbose" == "true" && "$quiet" == "false" && "$json" == "false" ]]; then
local lock_mtime
lock_mtime=$(stat -f %m renv.lock 2>/dev/null || stat -c %Y renv.lock 2>/dev/null)
lock_mtime=$(stat -c %Y renv.lock 2>/dev/null || stat -f %m renv.lock 2>/dev/null)
if [[ -n "$lock_mtime" ]]; then
local age_days=$(( (EPOCHSECONDS - lock_mtime) / 86400 ))
if [[ $age_days -eq 0 ]]; then
Expand Down Expand Up @@ -751,7 +751,7 @@ _teach_doctor_check_r_packages() {

# Lock file freshness
local lock_mtime
lock_mtime=$(stat -f %m renv.lock 2>/dev/null || stat -c %Y renv.lock 2>/dev/null)
lock_mtime=$(stat -c %Y renv.lock 2>/dev/null || stat -f %m renv.lock 2>/dev/null)
if [[ -n "$lock_mtime" ]]; then
local age_days=$(( (EPOCHSECONDS - lock_mtime) / 86400 ))
if [[ $age_days -eq 0 ]]; then
Expand Down Expand Up @@ -1076,13 +1076,13 @@ _teach_doctor_check_macros() {
if [[ "$macros_configured" == "true" ]]; then
if [[ -f "$cache_file" ]]; then
local cache_mtime=0
cache_mtime=$(stat -f %m "$cache_file" 2>/dev/null || stat -c %Y "$cache_file" 2>/dev/null || echo 0)
cache_mtime=$(stat -c %Y "$cache_file" 2>/dev/null || stat -f %m "$cache_file" 2>/dev/null || echo 0)

local stale=0
for src in "${sources[@]}"; do
if [[ -f "$src" ]]; then
local src_mtime
src_mtime=$(stat -f %m "$src" 2>/dev/null || stat -c %Y "$src" 2>/dev/null || echo 0)
src_mtime=$(stat -c %Y "$src" 2>/dev/null || stat -f %m "$src" 2>/dev/null || echo 0)
if (( src_mtime > cache_mtime )); then
stale=1
break
Expand Down
Loading