diff --git a/.github/scripts/test_ci_workflow.py b/.github/scripts/test_ci_workflow.py index 890f52a..2676196 100644 --- a/.github/scripts/test_ci_workflow.py +++ b/.github/scripts/test_ci_workflow.py @@ -47,6 +47,7 @@ def test_ci_workflow_guard_is_run_by_ci(self) -> None: text = workflow_text() self.assertIn("python3 .github/scripts/test_ci_workflow.py", text) + self.assertIn("python3 .github/scripts/test_milestone_b_internal_checks.py", text) if __name__ == "__main__": diff --git a/.github/scripts/test_milestone_b_internal_checks.py b/.github/scripts/test_milestone_b_internal_checks.py new file mode 100644 index 0000000..eaab149 --- /dev/null +++ b/.github/scripts/test_milestone_b_internal_checks.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# +# Copyright 2026 The Ethos maintainers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations + +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[2] +MAKEFILE = ROOT / "Makefile" + + +def makefile_text() -> str: + return MAKEFILE.read_text(encoding="utf-8") + + +def target_block(target: str) -> str: + lines = makefile_text().splitlines() + start = None + for index, line in enumerate(lines): + if line == f"{target}:": + start = index + 1 + break + if start is None: + raise AssertionError(f"{target} target is missing") + + block: list[str] = [] + for line in lines[start:]: + if line and not line.startswith(("\t", " ")): + break + block.append(line) + return "\n".join(block) + + +class MilestoneBInternalCheckTests(unittest.TestCase): + def test_target_is_declared_phony(self) -> None: + text = makefile_text() + + self.assertIn(".PHONY:", text) + self.assertIn("milestone-b-internal-checks", text) + + def test_target_composes_current_internal_gates(self) -> None: + block = target_block("milestone-b-internal-checks") + + required = [ + "$(PYTHON) fixtures/validate_fixtures.py", + "$(PYTHON) schemas/test_font_policy_validation.py", + "$(MAKE) verify-alpha PYTHON=$(PYTHON)", + "$(MAKE) layout-evaluator-alpha PYTHON=$(PYTHON)", + "$(MAKE) python-surface-test PYTHON=$(PYTHON)", + "$(PYTHON) .github/scripts/claims_gate.py", + "$(PYTHON) .github/scripts/readiness_gate.py public", + "git diff --check", + ] + for command in required: + self.assertIn(command, block) + + def test_target_stays_internal_only(self) -> None: + block = target_block("milestone-b-internal-checks") + + self.assertNotIn("release-", block) + self.assertNotIn("third-party-license-manifest", block) + self.assertNotIn("release-notice-draft", block) + + +if __name__ == "__main__": + unittest.main() diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 780aeb2..385f2f6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,6 +56,8 @@ jobs: run: python3 .github/scripts/test_determinism_workflow.py - name: CI workflow tests run: python3 .github/scripts/test_ci_workflow.py + - name: Milestone B internal check target tests + run: python3 .github/scripts/test_milestone_b_internal_checks.py - name: Gate Zero harness tests run: python3 benchmarks/harness/test_run_gate_zero.py - name: same-platform double-parse byte-diff diff --git a/Makefile b/Makefile index 9f25686..c8d109e 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,7 @@ python-surface-test: milestone-b-internal-checks: $(PYTHON) fixtures/validate_fixtures.py + $(PYTHON) schemas/test_font_policy_validation.py $(MAKE) verify-alpha PYTHON=$(PYTHON) $(MAKE) layout-evaluator-alpha PYTHON=$(PYTHON) $(MAKE) python-surface-test PYTHON=$(PYTHON) diff --git a/docs/execution-status.md b/docs/execution-status.md index 8d670e7..5e3e063 100644 --- a/docs/execution-status.md +++ b/docs/execution-status.md @@ -20,7 +20,7 @@ The committed implementation now includes: - Schema/example/profile validation is green through `schemas/validate_examples.py` using `jsonschema` draft 2020-12 validation, including the crop descriptor artifact contract plus referential-integrity and bbox sanity checks outside JSON Schema. Fixture validation also binds internal font-isolation PDFs to committed manifest hashes. - `ethos verify` now produces non-empty quote, value, presence, and table-cell verification checks over native Ethos document JSON and synthetic OpenDataLoader-style JSON through `--grounding opendataloader-json`; it also verifies quote/value/presence citations over pinned real OpenDataLoader 2.4.7 JSON, including grounded and ungrounded cases, maps explicit real OpenDataLoader-style row/cell structures to table-cell grounding, and normalizes conservative real-style text/child-container aliases when page/bbox/text data remains explicit. Citation/config inputs are rejected when they drift outside the closed schemas. The public demo harness covers grounded, ungrounded, split-quote, not-found, stale-fingerprint, unsupported non-v1 claim, capability-limited, malformed-citation, malformed OpenDataLoader-style input, and summary-format reject paths. - Verification semantics are now trust-honest at alpha scope: quote containment is explicitly labeled, value/table-cell checks require normalized equality, fingerprint-pinned citations fail closed when source fingerprints are unavailable, and structured capability limits explain why a run is downgraded. -- `make verify-alpha` is the current alpha trust-loop command: it checks native examples, split-quote evidence matching, unsupported non-v1 claim reporting, synthetic OpenDataLoader-style examples, pinned real OpenDataLoader grounded/ungrounded examples, schema validation, verify-alpha case inventory coverage, usage diagnostics for malformed citations and malformed OpenDataLoader-style structures, byte-identical repeated verification reports, byte-identical native crop descriptors, summary diagnostics for an ungrounded native case, and foreign fixture manifest hash binding. `make milestone-b-internal-checks` composes the current internal Milestone B validation path across fixture validation, verify alpha, layout evaluator, Python surface tests, and policy gates. +- `make verify-alpha` is the current alpha trust-loop command: it checks native examples, split-quote evidence matching, unsupported non-v1 claim reporting, synthetic OpenDataLoader-style examples, pinned real OpenDataLoader grounded/ungrounded examples, schema validation, verify-alpha case inventory coverage, usage diagnostics for malformed citations and malformed OpenDataLoader-style structures, byte-identical repeated verification reports, byte-identical native crop descriptors, summary diagnostics for an ungrounded native case, and foreign fixture manifest hash binding. `make milestone-b-internal-checks` composes the current internal Milestone B validation path across fixture validation, font-policy profile checks, verify alpha, layout evaluator, Python surface tests, and policy gates; CI has a static guard for that target's command wiring. - An internal Python surface scaffold exists under `python/ethos_pdf`. It shells out to a caller-provided local `ethos` CLI binary for `ethos doc parse` JSON, Markdown, and text output, and has stdlib unit tests that use a fake local command. This is pre-alpha scaffolding for Milestone B API shape work, not a public installation or publication path. - Native Ethos verification can emit deterministic, schema-backed crop descriptor JSON artifacts through `--crop-dir`; these bind `document_fingerprint`, page, bbox, and check ids. Native `crop_ref` filenames are logical evidence references derived from document fingerprint, check id, and page, while descriptors still record the exact observed bbox. When `--crop-source-pdf` is supplied, the CLI validates source-PDF fingerprint binding and emits PNG crop artifacts whose filenames, byte hashes, dimensions, and source fingerprint are bound from the descriptor. `make verify-rendered-crops` checks same-host repeated-run stability for the rendered artifact path, and `make compare-rendered-crops` classifies two rendered-crop runs by separating logical evidence identity from rendered artifact byte equality. Cross-platform rendered image determinism is not claimed; the 2026-06-14 macOS arm64 vs Linux x64 validation record in `docs/validation/rendered-crops-2026-06-14.md` preserved document fingerprint and `payload_sha256` but failed rendered artifact byte equality because the evidence bbox differed slightly across platforms.