Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip"
- name: Install dependencies
run: pip install -e ".[dev]"
- name: Ruff check
Expand All @@ -31,12 +32,44 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip"
- name: Install dependencies
run: pip install -e ".[dev]"
- name: Mypy
run: mypy --strict src/

# Warm the HuggingFace model cache exactly once before the test matrix.
# The benchmark tests load a real sentence-transformers model
# (all-MiniLM-L6-v2) on purpose. If the matrix legs each populated the cache
# themselves, a cold cache means all three legs miss and download the model
# concurrently — tripping HF rate limiting (HTTP 429), and if every leg fails
# the cache is never saved. This single upstream job does at most one download
# per cold workflow run, saves the shared cache, and the matrix (which `needs`
# it) then restores a warm cache and runs fully offline.
warm-hf-cache:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip"
- name: Restore/Save HuggingFace cache
id: hf-cache
uses: actions/cache@v4
with:
path: ~/.cache/huggingface
key: hf-${{ runner.os }}-sentence-transformers-all-MiniLM-L6-v2-v1
- name: Download model on cache miss
if: steps.hf-cache.outputs.cache-hit != 'true'
# Only the minimal stack needed to fetch + load the model. This is the
# single, serialized download for the whole workflow on a cold cache.
run: |
pip install "sentence-transformers>=3.0.0"
python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"

test:
needs: warm-hf-cache
runs-on: ubuntu-latest
strategy:
matrix:
Expand All @@ -46,7 +79,24 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
# Restore the cache warmed by `warm-hf-cache`. After warming this is a hit
# on every matrix leg, so the tests run offline and make no Hub request.
- name: Restore HuggingFace models
id: hf-cache
uses: actions/cache@v4
with:
path: ~/.cache/huggingface
key: hf-${{ runner.os }}-sentence-transformers-all-MiniLM-L6-v2-v1
- name: Install dependencies
run: pip install -e ".[dev]"
- name: Run tests
env:
# Force offline only on a cache hit (the warm-hf-cache job populates it
# first, so legs hit). A defensive '0' on a miss lets a leg self-heal
# by downloading rather than hard-failing — but the warm job makes a
# miss here unexpected.
HF_HUB_OFFLINE: ${{ steps.hf-cache.outputs.cache-hit == 'true' && '1' || '0' }}
TRANSFORMERS_OFFLINE: ${{ steps.hf-cache.outputs.cache-hit == 'true' && '1' || '0' }}
HF_HUB_DISABLE_TELEMETRY: "1"
run: pytest --cov --cov-fail-under=90 -q
14 changes: 14 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,21 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip"
# The pre-publish smoke gate loads the real all-MiniLM-L6-v2 model; cache
# the Hub store (and go offline on a cache hit) so a release is never
# blocked by transient HF 429 rate limiting.
- name: Cache HuggingFace models
id: hf-cache
uses: actions/cache@v4
with:
path: ~/.cache/huggingface
key: hf-${{ runner.os }}-sentence-transformers-all-MiniLM-L6-v2-v1
- name: Pre-publish smoke gate
env:
HF_HUB_OFFLINE: ${{ steps.hf-cache.outputs.cache-hit == 'true' && '1' || '0' }}
TRANSFORMERS_OFFLINE: ${{ steps.hf-cache.outputs.cache-hit == 'true' && '1' || '0' }}
HF_HUB_DISABLE_TELEMETRY: "1"
run: |
pip install -e ".[embeddings-local]"
python scripts/check_smoke_gate.py
Expand Down
16 changes: 16 additions & 0 deletions .github/workflows/smoke-gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,27 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip"

# The smoke gate runs the synthetic benchmark, which loads the real
# all-MiniLM-L6-v2 model. Cache the Hub store so the model is not
# re-downloaded each run (avoids HF 429 rate limiting); go offline on a
# cache hit so no Hub request is made.
- name: Cache HuggingFace models
id: hf-cache
uses: actions/cache@v4
with:
path: ~/.cache/huggingface
key: hf-${{ runner.os }}-sentence-transformers-all-MiniLM-L6-v2-v1

- name: Install package with embeddings extra
run: pip install -e ".[embeddings-local]"

- name: Run pre-publish smoke gate
# Same committed floors as the publish path
# (scripts/check_smoke_gate.py). A breach fails the run.
env:
HF_HUB_OFFLINE: ${{ steps.hf-cache.outputs.cache-hit == 'true' && '1' || '0' }}
TRANSFORMERS_OFFLINE: ${{ steps.hf-cache.outputs.cache-hit == 'true' && '1' || '0' }}
HF_HUB_DISABLE_TELEMETRY: "1"
run: python scripts/check_smoke_gate.py
1 change: 1 addition & 0 deletions .github/workflows/upgrade-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "pip"

- name: Check the baseline version is published
id: baseline
Expand Down
13 changes: 8 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,16 @@ dev = [
# version range as the ``embeddings-local`` extra so both
# extras resolve together (``pip install -e .[dev,embeddings-local]``
# used to fail with a resolver conflict before this alignment).
# ``sentence-transformers`` pulls torch transitively (~500 MB) —
# the CI cache absorbs that cost after the first cold install.
# ``sentence-transformers`` pulls torch transitively (~500 MB). CI caches
# both the pip download (``cache: pip``) and the HuggingFace model store
# (``actions/cache`` on ``~/.cache/huggingface``), so this cost is paid once
# and the model-loading tests do not re-hit the Hub (avoids HF 429).
"sentence-transformers>=3.0.0",
# Install the optional sqlite-vec extension in the test environment so
# CI actually exercises the real extension-load path (the worker-thread
# load and ANN search), not just the numpy fallback. Mirrors the
# constraint declared in the ``vec`` extra above.
# CI actually exercises the real extension-load path: the worker-thread
# load and KNN search over the compact vec0 table — faster brute-force KNN,
# not ANN at the pinned 0.1.x line — not just the numpy fallback. Mirrors
# the constraint declared in the ``vec`` extra above.
"sqlite-vec>=0.1.0,<0.2.0",
]

Expand Down
Loading