intelnav/.github/workflows/ci.yml at main · IntelNav/intelnav · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
name: ci

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]
  workflow_dispatch:
    inputs:
      run_gpu_matrix:
        description: "Run integration-gpu jobs (needs self-hosted runners)."
        type: boolean
        default: false

env:
  CARGO_TERM_COLOR: always
  RUSTFLAGS: -D warnings

jobs:
  # ---------------------------------------------------------------------
  # cargo check on Linux. Single-OS because `cargo check` on multiple
  # platforms buys little over the real per-OS test job below. Gate for
  # the rest of the matrix so a broken workspace fails fast.
  # ---------------------------------------------------------------------
  check:
    name: cargo check
    runs-on: ubuntu-latest
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@v4

      - name: system deps
        run: |
          sudo apt-get update -qq
          sudo apt-get install -y build-essential pkg-config libssl-dev

      - name: rust toolchain (respects rust-toolchain.toml)
        run: rustc --version && cargo --version

      - name: cache cargo registry + target
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            target
          key: ${{ runner.os }}-cargo-check-${{ hashFiles('**/Cargo.lock') }}
          restore-keys: ${{ runner.os }}-cargo-check-

      - name: cargo check --workspace --all-targets
        run: cargo check --workspace --all-targets

      # Clippy + rustfmt are declared in rust-toolchain.toml so
      # rustup-managed runners have them available. `continue-on-error`
      # so first runs surface findings without breaking pre-existing
      # PRs; flip to fail-hard once the workspace is reviewed clean.
      - name: cargo fmt --check
        run: cargo fmt --all -- --check
        continue-on-error: true

      - name: cargo clippy --workspace --all-targets
        run: cargo clippy --workspace --all-targets -- -D warnings
        continue-on-error: true

  # ---------------------------------------------------------------------
  # Cross-OS pure-Rust tests. The chunker, stitcher, manifest, fetcher,
  # and serve-subcommand all run without libllama; if they regress on
  # Windows or macOS we need to know. Uses the synthetic-GGUF fixture
  # (crates/model-store/tests/common/mod.rs) so no model download.
  # ---------------------------------------------------------------------
  test:
    name: cargo test (${{ matrix.os }})
    runs-on: ${{ matrix.os }}
    needs: check
    timeout-minutes: 30
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
    steps:
      - uses: actions/checkout@v4

      - name: linux system deps
        if: runner.os == 'Linux'
        run: |
          sudo apt-get update -qq
          sudo apt-get install -y build-essential pkg-config libssl-dev

      - name: cache cargo registry + target
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            target
          key: ${{ runner.os }}-cargo-test-${{ hashFiles('**/Cargo.lock') }}
          restore-keys: ${{ runner.os }}-cargo-test-

      # `intelnav-ggml` dlopens libllama and `intelnav-runtime`
      # depends on it; both get their own integration job below.
      # The rest is pure Rust and runs on all three OSes here.
      - name: cargo test (pure-Rust crates, model-store with serve + p2p)
        shell: bash
        run: |
          cargo test --no-fail-fast \
            -p intelnav-core \
            -p intelnav-wire \
            -p intelnav-crypto \
            -p intelnav-net \
            -p intelnav-app \
            -- --test-threads=1
          cargo test --no-fail-fast \
            -p intelnav-model-store --features serve,p2p \
            -- --test-threads=1

  # ---------------------------------------------------------------------
  # Integration tests that require a real libllama + a real GGUF. Runs
  # only on Linux x86_64 for now — that's the backend coverage
  # `intelnav-release.yml` in the llama.cpp fork currently publishes
  # (CPU, Vulkan, ROCm). Expands as we add macOS-arm64 Metal / CUDA /
  # Windows builds to that workflow.
  # ---------------------------------------------------------------------
  integration:
    name: integration (${{ matrix.backend }})
    runs-on: ubuntu-latest
    needs: check
    timeout-minutes: 60
    # If the libllama artifact isn't published yet the download step
    # sets SKIP=1 and the rest no-ops cleanly; we don't need
    # continue-on-error to mask real failures.
    strategy:
      fail-fast: false
      matrix:
        backend: [cpu, vulkan]
        # ROCm needs an AMD GPU at runtime. The tarball is downloadable
        # but forward-pass tests would fail on a GPU-less hosted runner.
        # Run ROCm via the `integration-gpu` self-hosted matrix instead.
    steps:
      - uses: actions/checkout@v4

      - name: system deps
        run: |
          sudo apt-get update -qq
          sudo apt-get install -y build-essential pkg-config libssl-dev curl

      - name: cache cargo registry + target
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/registry
            ~/.cargo/git
            target
          key: ${{ runner.os }}-cargo-integration-${{ matrix.backend }}-${{ hashFiles('**/Cargo.lock') }}
          restore-keys: ${{ runner.os }}-cargo-integration-${{ matrix.backend }}-

      # Fetch the latest published libllama for this backend. Pack.sh
      # names tarballs `libllama-<os-arch-backend>-<short_sha>.tar.gz`,
      # so we match on a pattern instead of hardcoding the SHA. Falls
      # through gracefully (SKIP=1) when no release exists yet, which
      # is the state until the maintainer cuts the first `intelnav-v*`
      # tag on the `IntelNav/llama.cpp` fork.
      - name: download libllama artifact
        env:
          GH_TOKEN: ${{ github.token }}
          BACKEND: ${{ matrix.backend }}
        run: |
          set -euo pipefail
          mkdir -p "$HOME/.cache/intelnav/libllama"
          if ! gh release download --repo IntelNav/llama.cpp \
                --pattern "libllama-linux-x64-${BACKEND}-*.tar.gz" \
                --dir /tmp 2>&1; then
            echo "::warning::no libllama release for linux-x64-${BACKEND} yet"
            echo "SKIP=1" >> "$GITHUB_ENV"
            exit 0
          fi
          tar xzf /tmp/libllama-linux-x64-${BACKEND}-*.tar.gz \
            -C "$HOME/.cache/intelnav/libllama" --strip-components=1
          ls -la "$HOME/.cache/intelnav/libllama/"

      # A small real GGUF for the stitched_load forward-equivalence
      # gates. Qwen2.5-0.5B Q4_K_M is the smallest mainstream
      # published file (~469 MB) and covers the qwen2 arch path.
      - name: download test model (Qwen2.5-0.5B Q4_K_M)
        if: env.SKIP != '1'
        run: |
          set -euo pipefail
          mkdir -p "$HOME/IntelNav/models"
          curl --fail --location --silent --show-error \
            "https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GGUF/resolve/main/qwen2.5-0.5b-instruct-q4_k_m.gguf" \
            -o "$HOME/IntelNav/models/qwen2.5-0.5b-instruct-q4_k_m.gguf"
          ls -la "$HOME/IntelNav/models"

      - name: install vulkan loader (software rasterizer for headless runner)
        if: env.SKIP != '1' && matrix.backend == 'vulkan'
        run: |
          sudo apt-get install -y mesa-vulkan-drivers libvulkan1

      - name: cargo test ggml + runtime + stitched
        if: env.SKIP != '1'
        run: |
          # pack.sh lays out bin/libllama.so + include/, so LIBLLAMA_DIR
          # points at the bin subdir where the loader finds the .so.
          export INTELNAV_LIBLLAMA_DIR="$HOME/.cache/intelnav/libllama/bin"
          cargo test --no-fail-fast --lib --tests \
            -p intelnav-ggml \
            -p intelnav-runtime \
            -- --test-threads=1

  # ---------------------------------------------------------------------
  # GPU integration, parameterized on self-hosted runners. Add runners
  # tagged [self-hosted, gpu, <vendor>] and the matching matrix entry
  # here picks them up. No-op when the runners don't exist.
  # ---------------------------------------------------------------------
  integration-gpu:
    name: integration (${{ matrix.backend }}, self-hosted)
    needs: check
    # Only run on explicit workflow_dispatch with run_gpu_matrix=true.
    # Push / PR runs skip entirely — without self-hosted runners
    # registered, the jobs would otherwise sit queued forever.
    if: ${{ github.event_name == 'workflow_dispatch' && inputs.run_gpu_matrix }}
    runs-on: ${{ matrix.runs_on }}
    timeout-minutes: 90
    # Same skip-on-missing-artifact flow as `integration` above;
    # jobs stay queued only when the labeled runner is actually online.
    strategy:
      fail-fast: false
      matrix:
        include:
          - backend: rocm
            pattern: libllama-linux-x64-rocm-*.tar.gz
            runs_on: [self-hosted, gpu, amd]
          - backend: cuda
            pattern: libllama-linux-x64-cuda-*.tar.gz
            runs_on: [self-hosted, gpu, nvidia]
          - backend: metal
            pattern: libllama-macos-arm64-metal-*.tar.gz
            runs_on: [self-hosted, macos, arm64]
    steps:
      - uses: actions/checkout@v4

      - name: download libllama artifact
        env:
          GH_TOKEN: ${{ github.token }}
          PATTERN: ${{ matrix.pattern }}
        shell: bash
        run: |
          set -euo pipefail
          mkdir -p "$HOME/.cache/intelnav/libllama"
          if ! gh release download --repo IntelNav/llama.cpp \
                --pattern "$PATTERN" --dir /tmp 2>&1; then
            echo "::warning::no libllama release matching $PATTERN"
            echo "SKIP=1" >> "$GITHUB_ENV"
            exit 0
          fi
          # Exactly one tarball will match the pattern — extract it.
          tar xzf /tmp/*.tar.gz -C "$HOME/.cache/intelnav/libllama" --strip-components=1

      - name: cargo test with GPU backend
        if: env.SKIP != '1'
        shell: bash
        env:
          INTELNAV_TEST_NGL: "-1"  # offload all layers to GPU
          # ROCm quirk: the published linux-x64-rocm tarball targets
          # `AMDGPU_TARGETS=gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100`.
          # Cards like the RX 6600 report gfx1032, which is binary-
          # compatible with gfx1030 at runtime when this override is
          # set. Harmless on non-ROCm runners (ignored). If we add
          # gfx1032 to AMDGPU_TARGETS in the release matrix this line
          # becomes redundant but not wrong.
          HSA_OVERRIDE_GFX_VERSION: "10.3.0"
        run: |
          export INTELNAV_LIBLLAMA_DIR="$HOME/.cache/intelnav/libllama/bin"
          cargo test --no-fail-fast --lib --tests \
            -p intelnav-ggml \
            -- --test-threads=1

  provision:
    name: provision.sh (Ubuntu)
    runs-on: ubuntu-latest
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@v4
      - name: run scripts/provision.sh --yes --skip-check
        run: bash scripts/provision.sh --yes --skip-check