diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b77c2ff..99cb777 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,6 +38,35 @@ jobs: - name: Mypy run: mypy --strict src/ + dependency-audit: + name: Dependency CVE audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: "pip" + # Install the package NON-editable so pip-audit can resolve and audit the + # engrava distribution itself. An editable install (`-e`) builds an + # `*.editable` wheel that pip-audit reports as a SkippedDependency, and + # `--strict` hard-fails on ANY skip ("distribution marked as editable"). + # A plain `pip install .` produces a normal, auditable distribution, so + # --strict still fails on a real CVE or collection failure — without + # tripping on the self-package. + - name: Install package + run: | + # Upgrade pip first: pip-audit --strict audits the whole environment, + # including pip itself, and the runner image can ship a pip with a + # freshly disclosed CVE (e.g. PYSEC-2026-196 in pip 26.1.1). Upgrading + # to the latest pip clears it for real rather than ignoring the finding. + python -m pip install --upgrade pip + pip install . + - name: Run pip-audit + run: | + pip install pip-audit + pip-audit --strict + # Warm the HuggingFace model cache exactly once before the test matrix. # The benchmark tests load a real sentence-transformers model # (all-MiniLM-L6-v2) on purpose. If the matrix legs each populated the cache diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 05afd77..6f82485 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -131,5 +131,12 @@ jobs: run: | pip install build python -m build + # Gate the publish on package-data integrity: verify the freshly built + # wheel actually bundles the required non-Python data (schema SQL + + # synthetic dataset) before it can reach PyPI. Catches a packaging + # regression (e.g. a file falling out of [tool.setuptools.package-data]) + # at the one point where it would otherwise ship broken. + - name: Verify wheel package data + run: python scripts/verify_wheel_data.py - name: Publish to PyPI (OIDC, no token) uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/secret-scan.yml b/.github/workflows/secret-scan.yml new file mode 100644 index 0000000..c33225f --- /dev/null +++ b/.github/workflows/secret-scan.yml @@ -0,0 +1,52 @@ +name: Secret scanning + +# Generic secret detection (gitleaks, built-in rule set). Catches accidental +# commits of API keys, tokens, .env files, private keys, etc. from any +# contributor. Uses only gitleaks' built-in patterns — no project-specific +# rules — so this workflow reveals nothing about internal naming or structure. +# +# gitleaks is run from its official release binary in a plain `run:` step rather +# than via the gitleaks-action: this repository's Actions policy allows only +# GitHub-owned (actions/*) and verified-publisher actions, and the gitleaks +# action is neither. Pinning the binary by version + SHA-256 gives a reproducible +# scan without taking a dependency on a non-allowed third-party action. + +on: + push: + branches: [main, dev] + pull_request: + branches: [main, dev] + +permissions: + contents: read + +env: + GITLEAKS_VERSION: "8.30.1" + GITLEAKS_SHA256: "551f6fc83ea457d62a0d98237cbad105af8d557003051f41f3e7ca7b3f2470eb" + +jobs: + gitleaks: + name: Gitleaks scan + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + # Full history so the scan covers the whole reachable commit range, + # not just the tip — a secret introduced in any commit is caught. + fetch-depth: 0 + + - name: Install gitleaks (pinned binary, checksum-verified) + run: | + set -euo pipefail + url="https://github.com/gitleaks/gitleaks/releases/download/v${GITLEAKS_VERSION}/gitleaks_${GITLEAKS_VERSION}_linux_x64.tar.gz" + curl -sSfL "$url" -o gitleaks.tar.gz + echo "${GITLEAKS_SHA256} gitleaks.tar.gz" | sha256sum -c - + tar -xzf gitleaks.tar.gz gitleaks + chmod +x gitleaks + ./gitleaks version + + - name: Scan repository history + run: | + set -euo pipefail + # `git` mode scans the commit history; exit code 1 = leaks found. + ./gitleaks git . --redact --verbose --exit-code 1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 254490b..8ac94e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,15 @@ and this project adheres to [Semantic Versioning 2.0.0](https://semver.org/spec/ always re-disabled after the load attempt, even when the load fails, so a connection is never left with extension loading enabled. +### Changed + +- **CI hardening (maintenance, no runtime impact).** Continuous integration + now runs a generic secret scan (gitleaks, built-in rules) and a dependency + vulnerability audit (`pip-audit --strict`) on every push and pull request, + and the release pipeline verifies that the built wheel bundles its required + package data before publishing to PyPI. These are tooling/CI changes only — + no change to the installed package or its behaviour. + ## 0.3.0 (2026-06-02) * ci: add on-demand smoke-gate workflow (#10) ([50e2bf2](https://github.com/sovantica/engrava/commit/50e2bf2)), closes [#10](https://github.com/sovantica/engrava/issues/10)