From b5a57f6aa0b1f833f6b8bfac8d6ba86bbaf70877 Mon Sep 17 00:00:00 2001 From: przemarzec <98286080+przemarzec@users.noreply.github.com> Date: Sat, 6 Jun 2026 23:36:38 +0200 Subject: [PATCH 1/3] ci: add secret scan + dependency audit; verify wheel data on publish Add generic, public-safe CI hardening: - secret-scan.yml: gitleaks (official pinned binary, checksum-verified, run in a plain step since the repo Actions policy allows only GitHub-owned and verified-publisher actions) scans the full history on push and PR for accidentally committed secrets. - ci.yml: a dependency-audit job runs pip-audit --strict so a dependency with a known CVE (or any audit collection failure) fails the build. - release.yml: the publish job verifies the freshly built wheel bundles its required package data before uploading to PyPI, catching a packaging regression at the one point it would otherwise ship. Tooling/CI only; no change to the installed package or its behaviour. --- .github/workflows/ci.yml | 21 +++++++++++++ .github/workflows/release.yml | 7 +++++ .github/workflows/secret-scan.yml | 52 +++++++++++++++++++++++++++++++ CHANGELOG.md | 9 ++++++ 4 files changed, 89 insertions(+) create mode 100644 .github/workflows/secret-scan.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b77c2ff..e387758 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,6 +38,27 @@ jobs: - name: Mypy run: mypy --strict src/ + dependency-audit: + name: Dependency CVE audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: "pip" + # Install the package so pip-audit resolves the real dependency tree + # (engrava itself is now on PyPI, so --strict no longer trips on the + # editable self-package). --strict fails the run on any collection + # failure as well as on a known CVE, so a vulnerable or unauditable + # dependency cannot pass silently. + - name: Install package + run: pip install -e "." + - name: Run pip-audit + run: | + pip install pip-audit + pip-audit --strict + # Warm the HuggingFace model cache exactly once before the test matrix. # The benchmark tests load a real sentence-transformers model # (all-MiniLM-L6-v2) on purpose. If the matrix legs each populated the cache diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 05afd77..6f82485 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -131,5 +131,12 @@ jobs: run: | pip install build python -m build + # Gate the publish on package-data integrity: verify the freshly built + # wheel actually bundles the required non-Python data (schema SQL + + # synthetic dataset) before it can reach PyPI. Catches a packaging + # regression (e.g. a file falling out of [tool.setuptools.package-data]) + # at the one point where it would otherwise ship broken. + - name: Verify wheel package data + run: python scripts/verify_wheel_data.py - name: Publish to PyPI (OIDC, no token) uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/secret-scan.yml b/.github/workflows/secret-scan.yml new file mode 100644 index 0000000..c33225f --- /dev/null +++ b/.github/workflows/secret-scan.yml @@ -0,0 +1,52 @@ +name: Secret scanning + +# Generic secret detection (gitleaks, built-in rule set). Catches accidental +# commits of API keys, tokens, .env files, private keys, etc. from any +# contributor. Uses only gitleaks' built-in patterns — no project-specific +# rules — so this workflow reveals nothing about internal naming or structure. +# +# gitleaks is run from its official release binary in a plain `run:` step rather +# than via the gitleaks-action: this repository's Actions policy allows only +# GitHub-owned (actions/*) and verified-publisher actions, and the gitleaks +# action is neither. Pinning the binary by version + SHA-256 gives a reproducible +# scan without taking a dependency on a non-allowed third-party action. + +on: + push: + branches: [main, dev] + pull_request: + branches: [main, dev] + +permissions: + contents: read + +env: + GITLEAKS_VERSION: "8.30.1" + GITLEAKS_SHA256: "551f6fc83ea457d62a0d98237cbad105af8d557003051f41f3e7ca7b3f2470eb" + +jobs: + gitleaks: + name: Gitleaks scan + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + # Full history so the scan covers the whole reachable commit range, + # not just the tip — a secret introduced in any commit is caught. + fetch-depth: 0 + + - name: Install gitleaks (pinned binary, checksum-verified) + run: | + set -euo pipefail + url="https://github.com/gitleaks/gitleaks/releases/download/v${GITLEAKS_VERSION}/gitleaks_${GITLEAKS_VERSION}_linux_x64.tar.gz" + curl -sSfL "$url" -o gitleaks.tar.gz + echo "${GITLEAKS_SHA256} gitleaks.tar.gz" | sha256sum -c - + tar -xzf gitleaks.tar.gz gitleaks + chmod +x gitleaks + ./gitleaks version + + - name: Scan repository history + run: | + set -euo pipefail + # `git` mode scans the commit history; exit code 1 = leaks found. + ./gitleaks git . --redact --verbose --exit-code 1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 254490b..8ac94e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,15 @@ and this project adheres to [Semantic Versioning 2.0.0](https://semver.org/spec/ always re-disabled after the load attempt, even when the load fails, so a connection is never left with extension loading enabled. +### Changed + +- **CI hardening (maintenance, no runtime impact).** Continuous integration + now runs a generic secret scan (gitleaks, built-in rules) and a dependency + vulnerability audit (`pip-audit --strict`) on every push and pull request, + and the release pipeline verifies that the built wheel bundles its required + package data before publishing to PyPI. These are tooling/CI changes only — + no change to the installed package or its behaviour. + ## 0.3.0 (2026-06-02) * ci: add on-demand smoke-gate workflow (#10) ([50e2bf2](https://github.com/sovantica/engrava/commit/50e2bf2)), closes [#10](https://github.com/sovantica/engrava/issues/10) From 6b5eb06112898dfa5606beef799ce16b3e61cea9 Mon Sep 17 00:00:00 2001 From: przemarzec <98286080+przemarzec@users.noreply.github.com> Date: Sat, 6 Jun 2026 23:39:32 +0200 Subject: [PATCH 2/3] ci: install package non-editable for the dependency audit pip-audit --strict reports an editable install as a SkippedDependency and hard-fails on the skip ("distribution marked as editable"). Install the package non-editable (pip install .) so pip-audit audits the engrava distribution itself; --strict still fails on a real CVE or collection error. --- .github/workflows/ci.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e387758..01dda7f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,13 +47,15 @@ jobs: with: python-version: "3.12" cache: "pip" - # Install the package so pip-audit resolves the real dependency tree - # (engrava itself is now on PyPI, so --strict no longer trips on the - # editable self-package). --strict fails the run on any collection - # failure as well as on a known CVE, so a vulnerable or unauditable - # dependency cannot pass silently. + # Install the package NON-editable so pip-audit can resolve and audit the + # engrava distribution itself. An editable install (`-e`) builds an + # `*.editable` wheel that pip-audit reports as a SkippedDependency, and + # `--strict` hard-fails on ANY skip ("distribution marked as editable"). + # A plain `pip install .` produces a normal, auditable distribution, so + # --strict still fails on a real CVE or collection failure — without + # tripping on the self-package. - name: Install package - run: pip install -e "." + run: pip install . - name: Run pip-audit run: | pip install pip-audit From de7b4579bb4c1b37faeca61b8d34765bb441b77d Mon Sep 17 00:00:00 2001 From: przemarzec <98286080+przemarzec@users.noreply.github.com> Date: Sat, 6 Jun 2026 23:47:18 +0200 Subject: [PATCH 3/3] ci: upgrade pip before the dependency audit pip-audit --strict audits the whole environment, including pip itself. The runner image shipped pip 26.1.1, which has a known advisory (PYSEC-2026-196, fixed in 26.1.2), failing the strict audit. Upgrade pip to latest before auditing so the finding is resolved for real, not ignored. The engrava distribution and its dependencies already audit clean. --- .github/workflows/ci.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 01dda7f..99cb777 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,7 +55,13 @@ jobs: # --strict still fails on a real CVE or collection failure — without # tripping on the self-package. - name: Install package - run: pip install . + run: | + # Upgrade pip first: pip-audit --strict audits the whole environment, + # including pip itself, and the runner image can ship a pip with a + # freshly disclosed CVE (e.g. PYSEC-2026-196 in pip 26.1.1). Upgrading + # to the latest pip clears it for real rather than ignoring the finding. + python -m pip install --upgrade pip + pip install . - name: Run pip-audit run: | pip install pip-audit