diff --git a/.github/workflows/terraform.yml b/.github/workflows/terraform.yml index 28c01a8..79e8787 100644 --- a/.github/workflows/terraform.yml +++ b/.github/workflows/terraform.yml @@ -24,8 +24,8 @@ jobs: name: terraform fmt runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: hashicorp/setup-terraform@v3 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + - uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # v3.1.2 with: terraform_version: ${{ env.TF_VERSION }} - run: terraform -chdir=terraform fmt -check -recursive @@ -34,8 +34,8 @@ jobs: name: terraform validate runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: hashicorp/setup-terraform@v3 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + - uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # v3.1.2 with: terraform_version: ${{ env.TF_VERSION }} - run: terraform -chdir=terraform init -backend=false @@ -45,8 +45,8 @@ jobs: name: terraform test runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: hashicorp/setup-terraform@v3 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + - uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # v3.1.2 with: terraform_version: ${{ env.TF_VERSION }} - run: terraform -chdir=terraform init -backend=false diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index 79983c8..cefdd2e 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -15,19 +15,49 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 with: python-version: "3.11" - name: Generate docs run: python update_outputs.py + - name: Verify SHA256SUMS + # Sanity-check what we're about to publish — every recorded hash must + # match the on-disk file. Catches local corruption before push. + run: | + cd docs/output + sha256sum -c SHA256SUMS + - name: Commit and push docs + id: commit run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" git add docs/ - git diff --staged --quiet || (git commit -m "chore: update IP ranges and docs [weekly]" && git push) + if git diff --staged --quiet; then + echo "changed=false" >> "$GITHUB_OUTPUT" + echo "No changes to publish." + else + git commit -m "chore: update IP ranges and docs [weekly]" + git push + echo "changed=true" >> "$GITHUB_OUTPUT" + fi + + - name: Tag release + # Tag every successful publication so consumers can pin a stable ?ref=. + # Format: v. If the tag already exists (multiple runs same + # day), keep the existing tag — tags are immutable in this repo. + if: steps.commit.outputs.changed == 'true' + run: | + TAG="v$(date -u +%Y.%m.%d)" + if git rev-parse "refs/tags/$TAG" >/dev/null 2>&1; then + echo "Tag $TAG already exists — leaving it pointing at the earlier commit." + else + git tag -a "$TAG" -m "Weekly Databricks IP feed update — $TAG" + git push origin "$TAG" + echo "Tagged $TAG." + fi diff --git a/README.md b/README.md index cb2a903..792b9ea 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,10 @@ For Terraform-heavy shops that prefer to wire CIDRs directly into their existing **[→ Terraform Module](terraform/)** +For the threat model, recommended pinning patterns, and how to verify integrity of the published feeds, see: + +**[→ Security Model](SECURITY.md)** + For full CLI options, run `python extract-databricks-ips.py --help`. ## Disclaimer diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..c06adea --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,138 @@ +# Security Model + +This repo publishes IP CIDR feeds that downstream consumers wire into firewalls. **Tampering with those feeds could open networks to attacker-controlled IPs or remove legitimate Databricks IPs and break workloads.** This document is the threat model and the recommended consumption patterns. + +There is no perfect defense for any supply-chain dependency — you ultimately trust *some* publisher. The job is to make the trust boundary explicit, make tampering visible, and give you tools to verify before you apply. + +## Trust boundary + +When you consume from this repo, you are trusting: + +1. The contents of the GitHub repo at the commit/tag you reference +2. Anyone with push access to that repo +3. The weekly GitHub Actions workflow that regenerates feeds +4. The upstream `https://www.databricks.com/networking/v1/ip-ranges.json` endpoint +5. GitHub itself (Pages, Actions, repo storage) + +You can shrink this boundary substantially with a few patterns below. + +## Highest-leverage defense: pin to a tag or commit SHA + +This is the single most important thing. **Always pin `?ref=` to a tag or commit SHA in your Terraform module source, EDL URL, or wherever you reference these feeds.** + +```hcl +# Recommended — tag pin (bumps weekly with publication) +module "dbx_ips" { + source = "github.com/bhavink/databricksIPranges//terraform?ref=v2026.05.05" + ... +} + +# Strictest — commit SHA (you control exactly when CIDRs change) +module "dbx_ips" { + source = "github.com/bhavink/databricksIPranges//terraform?ref=a1b2c3d" + ... +} + +# Don't — tracks main, every plan re-resolves, exposes you to unreviewed changes +module "dbx_ips" { + source = "github.com/bhavink/databricksIPranges//terraform?ref=main" + ... +} +``` + +Pinning means: a compromise of `main` does not automatically reach you. You only get new CIDRs when *you* explicitly bump the `?ref=` in your own repo, which goes through your own PR review. + +## Defenses already in place (publish side) + +This repo applies these by default: + +| Defense | What it does | +|---|---| +| `SHA256SUMS` published with every release | Every `output/*.txt` file's hash is committed alongside the file. Consumers can verify before applying. | +| Weekly tagging (`v`) | Each publication gets an immutable tag. Pin to the tag. | +| GitHub Actions deps pinned by SHA | All third-party actions use commit SHA pins (not floating tags), preventing transitive action supply-chain attacks. | +| `permissions: contents: write` only | The weekly workflow has the minimum permissions needed; no secrets, no tokens beyond GITHUB_TOKEN. | +| Branch protection on `main` | Required PR review, no force push, no direct commits. | +| Reproducible publication | `update_outputs.py` is deterministic given the same `ip-ranges.json` snapshot. Anyone can re-run it and verify byte-identical output. | + +## Recommended consumption patterns by paranoia level + +### Default (most consumers) + +- Pin `?ref=v` to a recent tag in your TF module source +- Bump the tag periodically (monthly, or when notified of CIDR changes) +- Your `terraform plan` PR shows the CIDR diff — review before merging + +This neutralizes most attacks because tampering between publications doesn't reach you. + +### Stricter (regulated / high-stakes) + +- Pin `?ref=` to a specific commit SHA +- Verify `SHA256SUMS` at fetch time (planned in PR 4 — TF module hash verification) +- Bump the SHA via a dedicated review PR with explicit security-team approval + +### Most paranoid (airgapped / nation-state threat model) + +- **Vendor the feeds into your own repo.** Commit `aws-us-east-1.txt` etc. directly to your IaC repo. TF reads via `data "local_file"`, no runtime dependency on this repo. +- A scheduled job (Renovate, scheduled GH Action) opens a "refresh CIDRs" PR with the new content, signed by your own automation. The diff is reviewed in your repo, by your team, before any apply. +- Removes us from the runtime trust chain entirely. Strongest possible answer. + +```hcl +module "dbx_ips" { + source = "github.com/bhavink/databricksIPranges//terraform?ref=v2026.05.05" + cloud = "azure" + source_files = ["${path.module}/vendored/azure-eastus.txt"] +} +``` + +## Verifying integrity manually + +Every published `output/*.txt` is hashed in `output/SHA256SUMS`. To verify: + +```bash +cd /tmp +curl -sO https://bhavink.github.io/databricksIPranges/output/SHA256SUMS +curl -sO https://bhavink.github.io/databricksIPranges/output/azure-eastus.txt +sha256sum -c SHA256SUMS --ignore-missing +# azure-eastus.txt: OK +``` + +If a file's hash doesn't match its `SHA256SUMS` entry, **do not use that file.** Open an issue. + +## Reproducible build verification + +The publication is deterministic. To verify a published file is what `update_outputs.py` would produce from a known-good `ip-ranges.json` snapshot: + +```bash +git clone https://github.com/bhavink/databricksIPranges +cd databricksIPranges + +# Pick a JSON snapshot from json-history/ +SNAPSHOT=docs/json-history/ip-ranges-20260504-1712.json + +# Run the extractor against that snapshot +python extract-databricks-ips.py --file "$SNAPSHOT" --cloud azure --region eastus > /tmp/expected.txt + +# Compare against the published feed (or your fork's published feed) +diff /tmp/expected.txt docs/output/azure-eastus.txt +# (should be empty) +``` + +A divergence is provable evidence of tampering. + +## What's deliberately out of scope + +- **Compromise of GitHub itself, GitHub Pages, GitHub Actions infrastructure, or the Databricks publishing endpoint** — out of our control. Mitigated only by SHA pinning + vendoring (which use git's content-addressed storage). +- **Compromise of the upstream Databricks JSON URL** — if upstream is hijacked, this repo's feeds are also wrong. Detection: `validate_against_upstream` (planned, opt-in) cross-checks our published feed against the live Databricks JSON. Mitigation: pin to a known-good SHA from before the suspected compromise. +- **Solo-maintainer account compromise** — if my GitHub account is compromised, every defense above except reproducible-build verification breaks. The only real mitigation is an independent watchdog repo (out of scope for now). +- **Long-tail "stale fork goes bad"** — customers who don't review CIDR diffs before bumping `?ref=` can still consume a poisoned commit. The human gate (review the diff) is irreplaceable. + +## Reporting a vulnerability + +If you find a tampering pattern, a vulnerability in the publication pipeline, or a missing defense — **please do not open a public issue.** Email the maintainer directly (LinkedIn DM is fine if you don't have an email). + +## Future work + +- **PR 4 (planned):** TF module hash verification — fetch `SHA256SUMS` at plan time, compare against the hash of each fetched feed, fail the plan on mismatch. Defense-in-depth against single-file tampering between publications. +- **Optional:** SLSA provenance attestation via GitHub OIDC + Sigstore. Adds verifiable "this artifact was built from this workflow run on this commit." Not needed for v1. +- **Optional:** Independent watchdog repo on a separate identity that publishes the same files via the same logic. Defeats single-account compromise. Real defense, not yet implemented. diff --git a/docs/output/SHA256SUMS b/docs/output/SHA256SUMS new file mode 100644 index 0000000..840a72e --- /dev/null +++ b/docs/output/SHA256SUMS @@ -0,0 +1,89 @@ +5825dc0069b286d9514a326080847bbbbc9d17d74c1743cd7436bdd41889a639 all-inbound.txt +2bb1fdbdef78c661d43357dcfb660f9a7d9177fc1e97876438ccda1a2fd92e1c all-outbound.txt +f8b28bcf4ef3ed984a99ebdd5ac19cb344f7057b92190e2a17aa30e425dfdba2 all.txt +b0aae5a5b1759420d7c72e62efff0ec78c18a4b52e78db025b6b1209cadee1e0 aws-ap-northeast-1.txt +66ca05288245c57d28037982b6045217163f4bfbf93700ebb53a3dfae25a8b08 aws-ap-northeast-2.txt +d324a0338ac54e4a32b18274c1a56830217ab57ad1e1ffbe8d64dd22f31e9894 aws-ap-south-1.txt +cf4b454c58c01059505373708f80747265df2a7ebc5c5e708606dcb36b68e77a aws-ap-southeast-1.txt +a973dabe7793ce80f9c75aa270f5bb2a14f719284b7346d2aa3768a2e47b59b4 aws-ap-southeast-2.txt +e6a7cce50f360f7fff068a31e828672aea011ed4cc8397ff52b63d4d753cc2d5 aws-ap-southeast-3.txt +16049d29efeeff6d10fb3c34411bf05170817adb87358a600f62856d7b0c7c1b aws-ca-central-1.txt +de2c7cb08132d4a03e7c7d8a124baf67561426ee3e773cf3fa25576e4f8a0e45 aws-eu-central-1.txt +3b812c1d015f68a380722c083b922188bed2e4f07d288bff37b961c5a761e53f aws-eu-north-1.txt +13b436c3e3e60437746422429e4a46e8ea965bbf67074c9e53107e6126be5ff2 aws-eu-west-1.txt +98f70b55f608fa00ae9fc30652484cb947bb655ad2b0f3bbb3c969e1b4a74713 aws-eu-west-2.txt +803690dfe6b2cd40c647f37ad3bef36981d6d813352a1c51f0f42eff7a31d355 aws-eu-west-3.txt +d2e67ed076d8c36353b2483c87b8c0eb1d53530121957d2f7bc7d70262b950f4 aws-inbound.txt +0d68405388e953a94d18b5685c23c5d1826883f21b000460dd4fcbbfa84365a6 aws-outbound.txt +3826cf39b2d27aacbdedf38f23847ab5c58f0546b6fcf74197d5ddc5f0b6dec1 aws-sa-east-1.txt +e93fb1c2b25a7f89c0022ff36d79145b3256ee74f549f19aef65337a2a02187c aws-us-east-1.txt +a9fd2fd2da73594c6a51cdccd8202c7c6ee35d0c5c38bb4c94c0b6ab8aed0cae aws-us-east-2.txt +ba6bf452a8f85d55c297df8ae319161ad4387978d094cea9408dfe3513456a97 aws-us-gov-west-1.txt +dfcc743d1d2c765c3c600c50dae387394b796da229355a6ff85e2ec8817151f0 aws-us-west-1.txt +f4d20990757934e45ef4cdb50811c80f6dc5e44fc666c85969aeaa126f36947d aws-us-west-2.txt +5350fa78b41d385bad2598a6ce396d0e693ebb673e4661901d1189f0716166e9 aws.txt +739cb7dd239bb16575c9fc7286370a84854d3e5bfa95fbdc2d9a9ae08948b700 azure-australiacentral.txt +7730eaf8cc33185afd6e897c0d2c41c2489c0d691c03718948e3e433e11c664f azure-australiacentral2.txt +80cf28770361c7be1d3319ced67990630b99c6ebd99d1ea84c615f381480ee05 azure-australiaeast.txt +5e76d32b36b2616788de748c2669a14266c02ff14a923b9c4eb3e9770b68bf57 azure-australiasoutheast.txt +12b0a94714e27b7b17e6b0ccd9e8fcdaf33df906d8a7d3d8e5c115f13a0e0bde azure-brazilsouth.txt +109cd2df007e65affb3c96167b61fa97d11463f012c02ca990a27040a693fcaf azure-canadacentral.txt +6ced55273006b973a1ca7aef00433fdd03c0b05b4ae547e1ea9251e9f00a204f azure-canadaeast.txt +e66c0e99ce9bf62553e009a556994ded1cc063afda1d738c92d3e7e04a246586 azure-centralindia.txt +28d90557daa1c19bf5a9397ccf22778c6714fa04fb861c45029fff5a2bedb1bf azure-centralus.txt +6ee63cc084f5ca5c158b17c8d35766712d574a00dc59edd243f87c1a6dfae288 azure-chinaeast2.txt +06e714d2bad8583deff1d06dce743609159194c95b08fc9de06277b0658d8bea azure-chinaeast3.txt +c036bf7cfdfd4d2957d77f537fed790450ea484bb2012e483f3aa65166ba0c14 azure-chinanorth2.txt +03356cd13761c219289d88cf5d028e09e992e9c51219f97e2ed8bd28db632c00 azure-chinanorth3.txt +f6dd5d501cb48ab8d9dd4ea78ac3dd546a69d5d818b7d3b6f38815dbe4d8ef38 azure-eastasia.txt +605c172ee21ff08482cbeaf2d833a4d461b88613e72f9227d1cfce1327a9e3bd azure-eastus.txt +f6536b87a77d9e7115707f207154249523a75e6db8255088a1da9e983807093b azure-eastus2.txt +ffc290646d02e24948d7f6ff692b6f9c5b84ef139086346a42d6471283214d9b azure-francecentral.txt +24a98bec8147143e6bd72f393b52c3c10215998b83005c221a1a83e420750d96 azure-germanywestcentral.txt +e688a99099509ce08a111a661d96bcd062fcbefc322fd46dacc41edf6a2fa23d azure-inbound.txt +d459d7a346a3efabb0eba2af2aa5986efb888f931525b1467cd5115a0232aae7 azure-japaneast.txt +fba161f81b22b1fff1eb6aac536f0c1c35f63cc917259540b817fca043c85d5c azure-japanwest.txt +f2c947f9237be6b8081ac64683ac58f94cf5b94ef66b21b4484ea2f9cfb4d9a8 azure-koreacentral.txt +b3566931b0089152f230228518566d4e0f82961806f9dc329eda06d964a93ef3 azure-mexicocentral.txt +93bf836bbb9bea3c4f6dc84e1959718d6b1ee8ad814c15c1d722a8de74c7fd50 azure-northcentralus.txt +b2c5cc154119f734ef0c76f5c813fcfd9123f41eb90711dd411496bef8dbda22 azure-northeurope.txt +73c32d5060976db3e9daaaf05b1977996b8b888098a2a24eb067b4d4ca809550 azure-norwayeast.txt +7fb9a2f5127c9d284edeead55d7662619ee743279de800ce419a34e806865f80 azure-outbound.txt +4a9ff4edb6de7fa7b5798a9822a6f0f23fd8bd3b95329df881933f5adb681c9e azure-qatarcentral.txt +e38b6c2141fcce2c1a581e8091bc4be4ee1603307cd1c885dc3f834994c7e39b azure-southafricanorth.txt +2e1927985c25033991c9049c2ef7e9697767dc2a6aa28e87477794437f5cc72e azure-southcentralus.txt +bca0b3322ee7370e24b4c6b5fbf8d19a77e703e5ce1b5eef26a9217a965a93bd azure-southeastasia.txt +19564aca64ef1747566e44b71ce208b93b1964765fb6ca68a209cef9a6c184c3 azure-southindia.txt +6ccffef7d3f182b9e5b4b168f5437f833c5aca599560649bb697afe41764360d azure-swedencentral.txt +934c61af4b77c8faa8504176fda8b9a790b61493098c9f00e56d133cae1ec99f azure-switzerlandnorth.txt +ecc2d9d58a7c2e2265ff2aedd378cde7691215ef3ea0d97dbcfa2ef3327c4ad3 azure-switzerlandwest.txt +7ddd06ba5db70e1ba120c09aa23dab87604bf455359b6efafaebaf98e868a12f azure-uaenorth.txt +9edccb91b29f6d0cb3925b5daecb644a56ceac4e2c3e18cbb1b912c3cd69853f azure-uksouth.txt +5de64b9871ab84b6e09faa00942975bc7b2eea323e17cc5e5dd917e33a43544f azure-ukwest.txt +4f47e3ee6364669d02dc727842bb8a2a93e42f8900d96fa4765374ac2a1e6225 azure-usgovarizona.txt +d3061c27e58038e09a4d51cafa44cfcf7680dfcfcdb38545cbca656455de1d8f azure-usgovvirginia.txt +dbe7f95a895ec1133180d2cd4a8bcd3572a53c41aeff4409cb5ae74498cb1194 azure-westcentralus.txt +02db68003663433c18ed1fc40aecb941ec18c76b52636da47fcc6c9f78ba3199 azure-westeurope.txt +97b8d8f8b9aa4e9eec86bcc4b0c1b11df56461429573aa6fb773cdf7dedee8b0 azure-westindia.txt +35af12ca979d5dd12a798ef1aa1dbc821f75b230812487b8940b650126fbba4b azure-westus.txt +0fd37c374e39d54258d6c568fcd36f0e21c03df124a143abdfdd5eecfc7cf25a azure-westus2.txt +67a103a0f951ea4c1e3c5120a44422131d4b11e2f40c6daa0683468eb8208f75 azure-westus3.txt +30246d305a528673b2d797c38ac17b095d91825c84203a02b6ab01cd2ec6aa46 azure.txt +dad1728af915168c00c40883146ff19d6c74c4b10a317e717ef5161c3e94fed8 gcp-asia-northeast1.txt +afef578ec47413a8138ba41522824776d4e593b17017224fb5235c90da22dd82 gcp-asia-south1.txt +332b227e90c10619b659e23ddb072c90de39486299ffd4b9a59c1d3b97110fb6 gcp-asia-southeast1.txt +7a474e52591acecb2b40a3b35f207d9d39fa02e056c3921c55fad5843e710b7f gcp-australia-southeast1.txt +06382e19edd80fdd807a263f7e8e59a3688ac3fd955e4d029d1e56c77fcee28b gcp-europe-west1.txt +540a1f7066e17291c33b76a4afe05f9b48520a2f80d387e391273aff81a0c00f gcp-europe-west2.txt +4291598d216c265189a94240c3337a261268e86a98e1a97b32f9420495d7ad21 gcp-europe-west3.txt +18e04d227be60383ac370106a95772ae8e5f3db1aeb863d08276d69346162049 gcp-inbound.txt +ad64c448af73ebff5fe4b204c9df33006b7b702cd9fb1045ce79561e5c8768df gcp-me-central2.txt +f3dbe42e7454ff90a5a0bb62ec9283434457a69555f411b44833385b0c908634 gcp-northamerica-northeast1.txt +9d1b0faa1fed3a54dca7009305d77f06566045bc7fac794e2abdc28fccd17e71 gcp-outbound.txt +7f9ec2dbbc5abb56539df8a2c2a250b582909b19d1c28b6d89081725cdd83c75 gcp-southamerica-east1.txt +ae9a9630675c820e8ba9a4b021196d26daccd1c590a76610cb0b6a1da5925208 gcp-us-central1.txt +4bf44b1864be9d8152f8651ff039b0a97f98e9e586b0a59305c60a1683f826c5 gcp-us-east1.txt +2f3889150ba44c6697f765d9bb8dfe9fe7bff4a96b0a005b3c6f8ca806342e8e gcp-us-east4.txt +c56e7ae0840d1cb5b73f536ced890dfc6d12812816cc568660c6937c17cac490 gcp-us-west1.txt +3ec457f018f724a8462572db85c6eee7dd00f1beced683874cafb576919cc5e1 gcp-us-west4.txt +b90eb2c6f7c6b07cca412a333000882cfb1903dea67a130f3f545e4a17df487b gcp.txt diff --git a/docs/output/index.html b/docs/output/index.html index 8257ebb..2e36394 100644 --- a/docs/output/index.html +++ b/docs/output/index.html @@ -1,7 +1,7 @@ Directory Index – Databricks IP Ranges - +

Directory Index

Click on a file to download:

@@ -95,7 +95,9 @@

Directory Index

  • gcp-us-west1.txt
  • gcp-us-west4.txt
  • gcp.txt
  • +
  • SHA256SUMS
  • -

    Generated on 2026-05-04 17:17:10 UTC

    +
    Verify integrity: curl -sO <url>/SHA256SUMS && sha256sum -c SHA256SUMS
    +

    Generated on 2026-05-05 15:39:57 UTC

    Back to databricksIPranges

    \ No newline at end of file diff --git a/terraform/README.md b/terraform/README.md index 568fdd7..1681b7d 100644 --- a/terraform/README.md +++ b/terraform/README.md @@ -146,6 +146,8 @@ A periodic job in your repo (e.g. Renovate, a scheduled GH Action) updates the v **Why pin:** Without it, every `terraform plan` re-resolves `main` and could surface CIDR diffs you haven't reviewed. Pinning makes the bump an explicit PR in your repo. +For the full threat model and recommended patterns by paranoia level (default → strict → airgapped/vendored), see [SECURITY.md](../SECURITY.md). + --- ## Debugging diff --git a/test_update_outputs.py b/test_update_outputs.py index e9c1ae0..6746d5c 100644 --- a/test_update_outputs.py +++ b/test_update_outputs.py @@ -135,5 +135,61 @@ def test_output_index_lists_region_files(): assert "azure-eastus.txt" in index_html +def test_sha256sums_emitted(): + """SHA256SUMS file must be generated alongside the txt files.""" + out = _run_main() + assert (out / "SHA256SUMS").is_file(), "SHA256SUMS file missing" + + +def test_sha256sums_format_is_gnu(): + """Every line: <64-hex-digest>SPSP. Compatible with `sha256sum -c`.""" + import re + out = _run_main() + content = (out / "SHA256SUMS").read_text() + lines = [line for line in content.splitlines() if line.strip()] + assert len(lines) > 0 + for line in lines: + assert re.match(r"^[0-9a-f]{64} \S+", line), f"Bad SHA256SUMS line: {line!r}" + + +def test_sha256sums_contents_are_correct(): + """For each line, the recorded digest must match the on-disk file's actual sha256.""" + import hashlib + out = _run_main() + content = (out / "SHA256SUMS").read_text() + for line in content.splitlines(): + if not line.strip(): + continue + digest, name = line.split(" ", 1) + target = out / name + assert target.is_file(), f"SHA256SUMS references missing file: {name}" + actual = hashlib.sha256(target.read_bytes()).hexdigest() + assert actual == digest, f"Digest mismatch for {name}: recorded={digest}, actual={actual}" + + +def test_sha256sums_does_not_include_self(): + """SHA256SUMS must not list itself (avoids chicken-and-egg verification).""" + out = _run_main() + content = (out / "SHA256SUMS").read_text() + assert "SHA256SUMS" not in [line.split(" ", 1)[1] for line in content.splitlines() if " " in line] + + +def test_sha256sums_covers_all_txt_files(): + """Every .txt file in output/ must have a SHA256SUMS entry.""" + out = _run_main() + txt_files_on_disk = {p.name for p in out.iterdir() if p.suffix == ".txt"} + sha_lines = (out / "SHA256SUMS").read_text().splitlines() + sha_files = {line.split(" ", 1)[1] for line in sha_lines if " " in line} + missing = txt_files_on_disk - sha_files + assert not missing, f"SHA256SUMS missing entries for: {missing}" + + +def test_output_index_links_sha256sums(): + """The directory index must link to SHA256SUMS so consumers can find it.""" + out = _run_main() + index_html = (out / "index.html").read_text() + assert "SHA256SUMS" in index_html + + if __name__ == "__main__": sys.exit(__import__("pytest").main([__file__, "-v"])) diff --git a/update_outputs.py b/update_outputs.py index a308b4e..70df4a0 100644 --- a/update_outputs.py +++ b/update_outputs.py @@ -4,6 +4,7 @@ Run locally or from GitHub Actions. Uses extract-databricks-ips.py (no extra deps). """ +import hashlib import importlib.util import json from datetime import datetime, timezone @@ -101,22 +102,37 @@ def main(): (OUTPUT_DIR / filename).write_text(out_str.strip() + "\n") region_files.append(filename) + # SHA256SUMS — committed alongside the txt files so consumers can verify + # integrity at fetch time. GNU sha256sum format (" "), + # compatible with `sha256sum -c SHA256SUMS`. + txt_files = [fn for (_, _, fn) in outputs] + region_files + checksum_lines = [] + for fn in sorted(txt_files): + path = OUTPUT_DIR / fn + digest = hashlib.sha256(path.read_bytes()).hexdigest() + checksum_lines.append(f"{digest} {fn}") + (OUTPUT_DIR / "SHA256SUMS").write_text("\n".join(checksum_lines) + "\n") + # Directory index for output/ (like azureIPranges ranges-services-pa) generated_utc = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC") - txt_files = [fn for (_, _, fn) in outputs] + region_files + indexed_files = sorted(txt_files) + ["SHA256SUMS"] output_index_lines = [ "", "", "Directory Index – Databricks IP Ranges", - "", + "", "", "

    Directory Index

    ", "

    Click on a file to download:

    ", "
      ", ] - for fn in sorted(txt_files): + for fn in indexed_files: output_index_lines.append(f'
    • {fn}
    • ') output_index_lines.append("
    ") + output_index_lines.append( + "
    Verify integrity: " + "curl -sO <url>/SHA256SUMS && sha256sum -c SHA256SUMS
    " + ) output_index_lines.append(f"

    Generated on {generated_utc}

    ") output_index_lines.append(f"

    Back to databricksIPranges

    ") output_index_lines.append("")