From a1184384811a065d8b0774cfa22e07f8654cdaaa Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Thu, 18 Jun 2026 11:11:57 -0400 Subject: [PATCH] Update certified US Populace release --- changelog.d/populace-996401a.changed | 1 + .../data/release_manifests/us.json | 44 +++++++++---------- .../release_manifests/us.trace.tro.jsonld | 25 +++++------ src/policyengine/provenance/manifest.py | 7 ++- src/policyengine/provenance/trace.py | 9 ++-- tests/test_certify_data_release.py | 9 +--- tests/test_models.py | 2 +- tests/test_release_manifests.py | 6 +-- 8 files changed, 52 insertions(+), 51 deletions(-) create mode 100644 changelog.d/populace-996401a.changed diff --git a/changelog.d/populace-996401a.changed b/changelog.d/populace-996401a.changed new file mode 100644 index 00000000..48f9378e --- /dev/null +++ b/changelog.d/populace-996401a.changed @@ -0,0 +1 @@ +Update the certified US Populace dataset to the incumbent-improved 2024 release. diff --git a/src/policyengine/data/release_manifests/us.json b/src/policyengine/data/release_manifests/us.json index ae590a47..6ac4feaf 100644 --- a/src/policyengine/data/release_manifests/us.json +++ b/src/policyengine/data/release_manifests/us.json @@ -5,63 +5,63 @@ "certified_by": "populace-data release manifest", "certified_for_model_version": "1.729.0", "compatibility_basis": "exact_build_model_version", - "data_build_id": "populace-us-2024-a912aea-76666318a202-20260616T175345Z" + "data_build_id": "populace-us-2024-incumbent-improved-996401a-20260618" }, "certified_data_artifact": { - "build_id": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", + "build_id": "populace-us-2024-incumbent-improved-996401a-20260618", "data_package": { "name": "populace-data", "version": "0.1.0" }, "dataset": "populace_us_2024", - "sha256": "9d87c7ff370be524e73aaf68d151b00846eefcae4b00a63760102e2c6f285f92", - "uri": "hf://policyengine/populace-us/populace_us_2024.h5@populace-us-2024-a912aea-76666318a202-20260616T175345Z" + "sha256": "068c1d5fb9d731c425c5021f1eb67783d957bafd0618ec7584edd32c66dce7d4", + "uri": "hf://policyengine/populace-us/populace_us_2024.h5@c5a9f565aed7eb15e04dbabe0aff563b3b9a3a36" }, "country_id": "us", "data_package": { "name": "populace-data", - "release_manifest_path": "releases/populace-us-2024-a912aea-76666318a202-20260616T175345Z/release_manifest.json", - "release_manifest_revision": "c4e2fd454ddce0e1889ab77abff178a7bdd72b18", + "release_manifest_path": "releases/populace-us-2024-incumbent-improved-996401a-20260618/release_manifest.json", + "release_manifest_revision": "c5a9f565aed7eb15e04dbabe0aff563b3b9a3a36", "repo_id": "policyengine/populace-us", "repo_type": "dataset", "version": "0.1.0" }, "datasets": { "calibration_diagnostics": { - "path": "releases/populace-us-2024-a912aea-76666318a202-20260616T175345Z/calibration_diagnostics.json", + "path": "releases/populace-us-2024-incumbent-improved-996401a-20260618/calibration_diagnostics.json", "repo_id": "policyengine/populace-us", - "revision": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", - "sha256": "154a1b217211d92c50e0fb84750888920cf8a63afcf9437efa85e484a7d501c9" + "revision": "populace-us-2024-incumbent-improved-996401a-20260618", + "sha256": "554babf39002ec708bff31d98709fdc38b88cbcc7055f69cd37ddecdd1c46b87" }, "populace_us_2024": { "path": "populace_us_2024.h5", "repo_id": "policyengine/populace-us", - "revision": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", - "sha256": "9d87c7ff370be524e73aaf68d151b00846eefcae4b00a63760102e2c6f285f92" + "revision": "populace-us-2024-incumbent-improved-996401a-20260618", + "sha256": "068c1d5fb9d731c425c5021f1eb67783d957bafd0618ec7584edd32c66dce7d4" }, "populace_us_2024_calibration": { "path": "populace_us_2024_calibration.npz", "repo_id": "policyengine/populace-us", - "revision": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", - "sha256": "0679dd35dbb198164beee6d56626af2b2fb57d6a3b6ea6511daf908e66296175" + "revision": "populace-us-2024-incumbent-improved-996401a-20260618", + "sha256": "9ff5a0084defc9ee583742fbe0778fd1bdf610d952d329df2963552c1bfb7e6a" }, "us_source_coverage": { - "path": "releases/populace-us-2024-a912aea-76666318a202-20260616T175345Z/us_source_coverage.json", + "path": "releases/populace-us-2024-incumbent-improved-996401a-20260618/us_source_coverage.json", "repo_id": "policyengine/populace-us", - "revision": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", - "sha256": "233a87ccc1c1eb8ed95321b7ebe586cd483e4e5af37686e182803f3b88edc76d" + "revision": "populace-us-2024-incumbent-improved-996401a-20260618", + "sha256": "3c47a65b295fe2e5d8504384c12d682c5b54a3ff48a9e1ac617d6c2adda24795" }, "reform_validation": { - "path": "releases/populace-us-2024-a912aea-76666318a202-20260616T175345Z/reform_validation.json", - "revision": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", + "path": "releases/populace-us-2024-incumbent-improved-996401a-20260618/reform_validation.json", + "revision": "populace-us-2024-incumbent-improved-996401a-20260618", "repo_id": "policyengine/populace-us", - "sha256": "266851a23595eb832fdc3a88453fd60dfc12cd258e557b4ae7fd92b19eeb4f9e" + "sha256": "bb3a8ecca861c23299af1af027502afa49a65152a4e44c1e1d93cc9ea5c1688b" }, "demographics": { - "path": "releases/populace-us-2024-a912aea-76666318a202-20260616T175345Z/demographics.json", - "revision": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", + "path": "releases/populace-us-2024-incumbent-improved-996401a-20260618/demographics.json", + "revision": "populace-us-2024-incumbent-improved-996401a-20260618", "repo_id": "policyengine/populace-us", - "sha256": "073b9edf5a0594ab9e24c8aaae2a981a09ae510869a3f6c6cec3bb5500914897" + "sha256": "831ca1261e7c8ad7a96e477960a1482cecb79123a3714f0d144a3f1256275b8f" } }, "default_dataset": "populace_us_2024", diff --git a/src/policyengine/data/release_manifests/us.trace.tro.jsonld b/src/policyengine/data/release_manifests/us.trace.tro.jsonld index 5f6b4b57..f8bb524d 100644 --- a/src/policyengine/data/release_manifests/us.trace.tro.jsonld +++ b/src/policyengine/data/release_manifests/us.trace.tro.jsonld @@ -17,7 +17,7 @@ "schema:name": "PolicyEngine", "schema:url": "https://policyengine.org" }, - "schema:dateCreated": "2026-06-16T18:48:19.511357+00:00", + "schema:dateCreated": "2026-06-18T14:48:31.853398+00:00", "schema:description": "TRACE TRO for certified runtime bundle us-4.17.7 covering the bundle manifest, the certified dataset artifact, the country model wheel, and the country data release manifest when it is available.", "schema:name": "policyengine us certified bundle TRO", "trov:createdWith": { @@ -45,7 +45,7 @@ "trov:hasArtifact": { "@id": "composition/1/artifact/data_release_manifest" }, - "trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-us/resolve/c4e2fd454ddce0e1889ab77abff178a7bdd72b18/releases/populace-us-2024-a912aea-76666318a202-20260616T175345Z/release_manifest.json" + "trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-us/resolve/c5a9f565aed7eb15e04dbabe0aff563b3b9a3a36/releases/populace-us-2024-incumbent-improved-996401a-20260618/release_manifest.json" }, { "@id": "arrangement/1/location/dataset", @@ -53,7 +53,7 @@ "trov:hasArtifact": { "@id": "composition/1/artifact/dataset" }, - "trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-us/resolve/populace-us-2024-a912aea-76666318a202-20260616T175345Z/populace_us_2024.h5" + "trov:hasLocation": "https://huggingface.co/datasets/policyengine/populace-us/resolve/c5a9f565aed7eb15e04dbabe0aff563b3b9a3a36/populace_us_2024.h5" }, { "@id": "arrangement/1/location/model_wheel", @@ -75,21 +75,21 @@ "@type": "trov:ResearchArtifact", "schema:name": "policyengine.py bundle manifest for us", "trov:mimeType": "application/json", - "trov:sha256": "22d8245a4a53e4e1c2deb4cd5a54b098545e7c298dae8777283d0c2a449b0bc5" + "trov:sha256": "72ba4c9a09ccc9d90e0581701bcd8900e023a8b96d66f804c0c9ced0fb51986c" }, { "@id": "composition/1/artifact/data_release_manifest", "@type": "trov:ResearchArtifact", "schema:name": "populace-data release manifest 0.1.0", "trov:mimeType": "application/json", - "trov:sha256": "b50925148419218dec45800baa1c3c683b966a622a8cb281e49410b65c92d099" + "trov:sha256": "f95ecd0b08a0b457605bb30abaf56b900a735f2c5606c9b663f79019411e9f6a" }, { "@id": "composition/1/artifact/dataset", "@type": "trov:ResearchArtifact", "schema:name": "populace_us_2024", "trov:mimeType": "application/x-hdf5", - "trov:sha256": "9d87c7ff370be524e73aaf68d151b00846eefcae4b00a63760102e2c6f285f92" + "trov:sha256": "068c1d5fb9d731c425c5021f1eb67783d957bafd0618ec7584edd32c66dce7d4" }, { "@id": "composition/1/artifact/model_wheel", @@ -102,7 +102,7 @@ "trov:hasFingerprint": { "@id": "composition/1/fingerprint", "@type": "trov:CompositionFingerprint", - "trov:sha256": "f6fc1f6b54ae50ec474a6d6764b61140674a2070d4f3ccb0eb8e22af62c19936" + "trov:sha256": "39b143c9874a45cd5f2a165a6fc1e6292597d5916497729007abcb17ff54e4dc" } }, "trov:hasPerformance": { @@ -111,17 +111,14 @@ "pe:builtWithModelVersion": "1.729.0", "pe:certifiedBy": "populace-data release manifest", "pe:certifiedForModelVersion": "1.729.0", - "pe:ciGitRef": "refs/heads/main", - "pe:ciGitSha": "25d4a3d5e99dc1c415d077da9c0728bfebb16f16", - "pe:ciRunUrl": "https://github.com/PolicyEngine/policyengine.py/actions/runs/27683656210", "pe:compatibilityBasis": "exact_build_model_version", - "pe:dataBuildId": "populace-us-2024-a912aea-76666318a202-20260616T175345Z", - "pe:emittedIn": "github-actions", - "rdfs:comment": "Certification of build populace-us-2024-a912aea-76666318a202-20260616T175345Z for policyengine-us 1.729.0.", + "pe:dataBuildId": "populace-us-2024-incumbent-improved-996401a-20260618", + "pe:emittedIn": "local", + "rdfs:comment": "Certification of build populace-us-2024-incumbent-improved-996401a-20260618 for policyengine-us 1.729.0.", "trov:accessedArrangement": { "@id": "arrangement/1" }, - "trov:startedAtTime": "2026-06-16T18:48:19.511357+00:00", + "trov:startedAtTime": "2026-06-18T14:48:31.853398+00:00", "trov:wasConductedBy": { "@id": "trs" } diff --git a/src/policyengine/provenance/manifest.py b/src/policyengine/provenance/manifest.py index 97e8d21b..b9af72ee 100644 --- a/src/policyengine/provenance/manifest.py +++ b/src/policyengine/provenance/manifest.py @@ -173,6 +173,8 @@ class CountryReleaseManifest(BaseModel): region_datasets: dict[str, ArtifactPathTemplate] = Field(default_factory=dict) certified_data_artifact: Optional[CertifiedDataArtifact] = None certification: Optional[DataCertification] = None + source_sha256: Optional[str] = Field(default=None, exclude=True) + """Byte sha256 of the bundled manifest before Pydantic normalization.""" @property def default_dataset_uri(self) -> str: @@ -256,7 +258,10 @@ def get_release_manifest(country_id: str) -> CountryReleaseManifest: if not manifest_path.is_file(): raise ValueError(f"No bundled release manifest for country '{country_id}'") - return CountryReleaseManifest.model_validate_json(manifest_path.read_text()) + source_bytes = manifest_path.read_bytes() + manifest = CountryReleaseManifest.model_validate_json(source_bytes) + manifest.source_sha256 = hashlib.sha256(source_bytes).hexdigest() + return manifest @lru_cache diff --git a/src/policyengine/provenance/trace.py b/src/policyengine/provenance/trace.py index 8b919bdf..185c8228 100644 --- a/src/policyengine/provenance/trace.py +++ b/src/policyengine/provenance/trace.py @@ -361,9 +361,12 @@ def build_trace_tro_from_release_bundle( repo_type=country_manifest.data_package.repo_type, ) - bundle_manifest_hash = hashlib.sha256( - canonical_json_bytes(country_manifest.model_dump(mode="json")) - ).hexdigest() + bundle_manifest_hash = ( + country_manifest.source_sha256 + or hashlib.sha256( + canonical_json_bytes(country_manifest.model_dump(mode="json")) + ).hexdigest() + ) data_release_manifest_hash = ( data_release_manifest.source_sha256 or hashlib.sha256( diff --git a/tests/test_certify_data_release.py b/tests/test_certify_data_release.py index b9e26e40..0b378d06 100644 --- a/tests/test_certify_data_release.py +++ b/tests/test_certify_data_release.py @@ -365,18 +365,13 @@ def test__given_unreachable_vendored_artifact__then_raises(self, tmp_path): class TestVendoredSidecarBinding: def test__given_vendored_us_manifest__then_tro_sidecar_binds_it(self): """The shipped TRO must bind the shipped country manifest under the - canonical-model convention used by build_trace_tro_from_release_bundle.""" + same byte-hash convention used by trace-tro-verify.""" import hashlib from importlib.resources import files - from policyengine.provenance.manifest import CountryReleaseManifest - from policyengine.provenance.trace import canonical_json_bytes - manifest_dir = files("policyengine").joinpath("data/release_manifests") - manifest_text = manifest_dir.joinpath("us.json").read_text() - country_manifest = CountryReleaseManifest.model_validate_json(manifest_text) expected = hashlib.sha256( - canonical_json_bytes(country_manifest.model_dump(mode="json")) + manifest_dir.joinpath("us.json").read_bytes() ).hexdigest() tro = json.loads(manifest_dir.joinpath("us.trace.tro.jsonld").read_text()) diff --git a/tests/test_models.py b/tests/test_models.py index a302ec08..c586e4f5 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -119,7 +119,7 @@ def test_has_release_manifest_metadata(self): assert ( us_latest.default_dataset_uri == "hf://policyengine/populace-us/populace_us_2024.h5" - "@populace-us-2024-a912aea-76666318a202-20260616T175345Z" + "@c5a9f565aed7eb15e04dbabe0aff563b3b9a3a36" ) def test_has_hundreds_of_parameters(self): diff --git a/tests/test_release_manifests.py b/tests/test_release_manifests.py index 0656e635..ea395d54 100644 --- a/tests/test_release_manifests.py +++ b/tests/test_release_manifests.py @@ -44,8 +44,8 @@ US_MODEL_VERSION = "1.729.0" US_BUILT_WITH_MODEL_VERSION = "1.729.0" US_DATA_RELEASE_VERSION = "0.1.0" -US_DATA_RELEASE_ID = "populace-us-2024-a912aea-76666318a202-20260616T175345Z" -US_DATA_RELEASE_REVISION = "c4e2fd454ddce0e1889ab77abff178a7bdd72b18" +US_DATA_RELEASE_ID = "populace-us-2024-incumbent-improved-996401a-20260618" +US_DATA_RELEASE_REVISION = "c5a9f565aed7eb15e04dbabe0aff563b3b9a3a36" US_DATA_RELEASE_PATH = f"releases/{US_DATA_RELEASE_ID}/release_manifest.json" US_DATA_ARTIFACT_REVISION = US_DATA_RELEASE_ID US_CERTIFICATION_SOURCE = "populace-data release manifest" @@ -53,7 +53,7 @@ f"hf://policyengine/populace-us/populace_us_2024.h5@{US_DATA_ARTIFACT_REVISION}" ) US_CERTIFIED_DATASET_URI = ( - f"hf://policyengine/populace-us/populace_us_2024.h5@{US_DATA_ARTIFACT_REVISION}" + f"hf://policyengine/populace-us/populace_us_2024.h5@{US_DATA_RELEASE_REVISION}" ) US_RELEASE_MANIFEST_DATASET_URI = ( f"hf://policyengine/populace-us/populace_us_2024.h5@{US_DATA_RELEASE_REVISION}"