From 7cfaec30df83eee47560c242ce99dddfd40f6ccc Mon Sep 17 00:00:00 2001 From: PMarzec <98286080+przemarzec@users.noreply.github.com> Date: Sun, 7 Jun 2026 00:03:01 +0200 Subject: [PATCH] ci: add secret scan + dependency audit; verify wheel data on publish (#22) * ci: add secret scan + dependency audit; verify wheel data on publish Add generic, public-safe CI hardening: - secret-scan.yml: gitleaks (official pinned binary, checksum-verified, run in a plain step since the repo Actions policy allows only GitHub-owned and verified-publisher actions) scans the full history on push and PR for accidentally committed secrets. - ci.yml: a dependency-audit job runs pip-audit --strict so a dependency with a known CVE (or any audit collection failure) fails the build. - release.yml: the publish job verifies the freshly built wheel bundles its required package data before uploading to PyPI, catching a packaging regression at the one point it would otherwise ship. Tooling/CI only; no change to the installed package or its behaviour. * ci: install package non-editable for the dependency audit pip-audit --strict reports an editable install as a SkippedDependency and hard-fails on the skip ("distribution marked as editable"). Install the package non-editable (pip install .) so pip-audit audits the engrava distribution itself; --strict still fails on a real CVE or collection error. * ci: upgrade pip before the dependency audit pip-audit --strict audits the whole environment, including pip itself. The runner image shipped pip 26.1.1, which has a known advisory (PYSEC-2026-196, fixed in 26.1.2), failing the strict audit. Upgrade pip to latest before auditing so the finding is resolved for real, not ignored. The engrava distribution and its dependencies already audit clean. --- .github/workflows/ci.yml | 29 +++++++++++++++++ .github/workflows/release.yml | 7 +++++ .github/workflows/secret-scan.yml | 52 +++++++++++++++++++++++++++++++ CHANGELOG.md | 9 ++++++ 4 files changed, 97 insertions(+) create mode 100644 .github/workflows/secret-scan.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b77c2ff..99cb777 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,6 +38,35 @@ jobs: - name: Mypy run: mypy --strict src/ + dependency-audit: + name: Dependency CVE audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: "pip" + # Install the package NON-editable so pip-audit can resolve and audit the + # engrava distribution itself. An editable install (`-e`) builds an + # `*.editable` wheel that pip-audit reports as a SkippedDependency, and + # `--strict` hard-fails on ANY skip ("distribution marked as editable"). + # A plain `pip install .` produces a normal, auditable distribution, so + # --strict still fails on a real CVE or collection failure — without + # tripping on the self-package. + - name: Install package + run: | + # Upgrade pip first: pip-audit --strict audits the whole environment, + # including pip itself, and the runner image can ship a pip with a + # freshly disclosed CVE (e.g. PYSEC-2026-196 in pip 26.1.1). Upgrading + # to the latest pip clears it for real rather than ignoring the finding. + python -m pip install --upgrade pip + pip install . + - name: Run pip-audit + run: | + pip install pip-audit + pip-audit --strict + # Warm the HuggingFace model cache exactly once before the test matrix. # The benchmark tests load a real sentence-transformers model # (all-MiniLM-L6-v2) on purpose. If the matrix legs each populated the cache diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 05afd77..6f82485 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -131,5 +131,12 @@ jobs: run: | pip install build python -m build + # Gate the publish on package-data integrity: verify the freshly built + # wheel actually bundles the required non-Python data (schema SQL + + # synthetic dataset) before it can reach PyPI. Catches a packaging + # regression (e.g. a file falling out of [tool.setuptools.package-data]) + # at the one point where it would otherwise ship broken. + - name: Verify wheel package data + run: python scripts/verify_wheel_data.py - name: Publish to PyPI (OIDC, no token) uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/secret-scan.yml b/.github/workflows/secret-scan.yml new file mode 100644 index 0000000..c33225f --- /dev/null +++ b/.github/workflows/secret-scan.yml @@ -0,0 +1,52 @@ +name: Secret scanning + +# Generic secret detection (gitleaks, built-in rule set). Catches accidental +# commits of API keys, tokens, .env files, private keys, etc. from any +# contributor. Uses only gitleaks' built-in patterns — no project-specific +# rules — so this workflow reveals nothing about internal naming or structure. +# +# gitleaks is run from its official release binary in a plain `run:` step rather +# than via the gitleaks-action: this repository's Actions policy allows only +# GitHub-owned (actions/*) and verified-publisher actions, and the gitleaks +# action is neither. Pinning the binary by version + SHA-256 gives a reproducible +# scan without taking a dependency on a non-allowed third-party action. + +on: + push: + branches: [main, dev] + pull_request: + branches: [main, dev] + +permissions: + contents: read + +env: + GITLEAKS_VERSION: "8.30.1" + GITLEAKS_SHA256: "551f6fc83ea457d62a0d98237cbad105af8d557003051f41f3e7ca7b3f2470eb" + +jobs: + gitleaks: + name: Gitleaks scan + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + # Full history so the scan covers the whole reachable commit range, + # not just the tip — a secret introduced in any commit is caught. + fetch-depth: 0 + + - name: Install gitleaks (pinned binary, checksum-verified) + run: | + set -euo pipefail + url="https://github.com/gitleaks/gitleaks/releases/download/v${GITLEAKS_VERSION}/gitleaks_${GITLEAKS_VERSION}_linux_x64.tar.gz" + curl -sSfL "$url" -o gitleaks.tar.gz + echo "${GITLEAKS_SHA256} gitleaks.tar.gz" | sha256sum -c - + tar -xzf gitleaks.tar.gz gitleaks + chmod +x gitleaks + ./gitleaks version + + - name: Scan repository history + run: | + set -euo pipefail + # `git` mode scans the commit history; exit code 1 = leaks found. + ./gitleaks git . --redact --verbose --exit-code 1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 254490b..8ac94e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,15 @@ and this project adheres to [Semantic Versioning 2.0.0](https://semver.org/spec/ always re-disabled after the load attempt, even when the load fails, so a connection is never left with extension loading enabled. +### Changed + +- **CI hardening (maintenance, no runtime impact).** Continuous integration + now runs a generic secret scan (gitleaks, built-in rules) and a dependency + vulnerability audit (`pip-audit --strict`) on every push and pull request, + and the release pipeline verifies that the built wheel bundles its required + package data before publishing to PyPI. These are tooling/CI changes only — + no change to the installed package or its behaviour. + ## 0.3.0 (2026-06-02) * ci: add on-demand smoke-gate workflow (#10) ([50e2bf2](https://github.com/sovantica/engrava/commit/50e2bf2)), closes [#10](https://github.com/sovantica/engrava/issues/10)