diff --git a/.github/scripts/test_execution_status.py b/.github/scripts/test_execution_status.py index 221b897..a2b5110 100644 --- a/.github/scripts/test_execution_status.py +++ b/.github/scripts/test_execution_status.py @@ -30,13 +30,15 @@ def status_text() -> str: class ExecutionStatusTests(unittest.TestCase): - def test_status_is_scoped_to_internal_closeout(self) -> None: + def test_status_is_scoped_to_internal_continuation(self) -> None: text = status_text() self.assertIn( - "Status: Pre-alpha / internal Milestone C artifact-validation closeout.", + "Status: Pre-alpha / internal transition from Milestone C artifact-validation " + "closeout to Milestone D source-only contract work.", text, ) + self.assertIn("docs/milestone-d-verify-citations-contract.md", text) self.assertNotIn("Status: Pre-alpha / Milestone B entry.", text) def test_internal_check_command_is_documented(self) -> None: @@ -44,13 +46,15 @@ def test_internal_check_command_is_documented(self) -> None: self.assertIn("make milestone-b-internal-checks", text) self.assertIn("make milestone-c-internal-checks", text) + self.assertIn("make milestone-d-verify-citations-contract", text) self.assertIn("CI has a static guard for that target's command wiring", text) def test_public_posture_boundary_remains_explicit(self) -> None: text = status_text() self.assertIn( - 'Public language stays at "pre-alpha / internal Milestone C artifact-validation closeout"', + 'Public language stays at "source-only pre-alpha / internal Milestone C closeout ' + 'and Milestone D contract continuation"', text, ) self.assertIn("claim audit approves specific wording", text) diff --git a/.github/scripts/test_milestone_d_verify_citations_contract.py b/.github/scripts/test_milestone_d_verify_citations_contract.py new file mode 100644 index 0000000..cb9667d --- /dev/null +++ b/.github/scripts/test_milestone_d_verify_citations_contract.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +# +# Copyright 2026 The Ethos maintainers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations + +import re +import unittest +from pathlib import Path + +from makefile_guard import makefile_text, target_block + + +ROOT = Path(__file__).resolve().parents[2] +CONTRACT = ROOT / "docs/milestone-d-verify-citations-contract.md" +ROADMAP = ROOT / "docs/roadmap.md" +EXECUTION_STATUS = ROOT / "docs/execution-status.md" +SCHEMAS_README = ROOT / "schemas/README.md" + + +def contract_text() -> str: + return CONTRACT.read_text(encoding="utf-8") + + +def normalized_contract_text() -> str: + return re.sub(r"\s+", " ", contract_text()) + + +class MilestoneDVerifyCitationsContractTests(unittest.TestCase): + def test_target_is_declared_phony(self) -> None: + text = makefile_text() + + self.assertIn(".PHONY:", text) + self.assertIn("milestone-d-verify-citations-contract", text) + + def test_target_composes_contract_gates(self) -> None: + block = target_block("milestone-d-verify-citations-contract") + + required = [ + "cargo test --locked -p ethos-cli --test verify", + "$(PYTHON) schemas/validate_examples.py", + "$(PYTHON) .github/scripts/test_execution_status.py", + "$(PYTHON) .github/scripts/test_roadmap_status.py", + "$(PYTHON) .github/scripts/test_milestone_d_verify_citations_contract.py", + "git diff --check", + ] + for command in required: + self.assertIn(command, block) + + def test_target_stays_contract_scoped(self) -> None: + block = target_block("milestone-d-verify-citations-contract") + + for out_of_scope in [ + "verify-alpha", + "rag-chunk-alpha", + "security-report-alpha", + "verify-rendered-crops", + "compare-rendered-crops", + "layout-evaluator-alpha", + "python-surface-test", + "release-", + "third-party-license-manifest", + ]: + self.assertNotIn(out_of_scope, block) + + def test_contract_is_linked_from_status_docs(self) -> None: + for path in [ROADMAP, EXECUTION_STATUS, SCHEMAS_README]: + text = path.read_text(encoding="utf-8") + self.assertIn("milestone-d-verify-citations-contract.md", text, path) + + def test_contract_defines_existing_carrier_not_new_surface(self) -> None: + text = normalized_contract_text() + + self.assertIn("source-only pre-alpha contract work", text) + self.assertIn("The current executable carrier remains `ethos verify`", text) + self.assertIn("does not create a new public command, binding, or hosted surface", text) + self.assertIn( + "`verify_citations` names the contract between citation input, grounding source, " + "verification config, and verification report", + text, + ) + + def test_contract_pins_v1_supported_and_blocked_scope(self) -> None: + text = normalized_contract_text() + + for kind in ["`quote`", "`value`", "`presence`", "`table_cell`"]: + self.assertIn(kind, text) + self.assertIn("`region` and `other` remain explicit unsupported non-v1 inputs", text) + + for blocker in [ + "a new `verify_citations` CLI alias", + "Python, Node, MCP, or hosted API surfaces", + "crop API implementation", + "sandbox/subprocess backend expansion", + "semantic or arithmetic verification", + ]: + self.assertIn(blocker, text) + + def test_contract_names_fixture_backed_validation(self) -> None: + text = normalized_contract_text() + + self.assertIn("`schemas/examples/citations.example.json`", text) + self.assertIn("`schemas/examples/verification-report.example.json`", text) + self.assertIn("echoes the example claims in input order", text) + self.assertIn("`all_evidence_grounded` is true only under the invariant", text) + self.assertIn( + "`make milestone-d-verify-citations-contract PYTHON=/bin/python`", + text, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/Makefile b/Makefile index ffb801e..5501667 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ COMPARE_RENDERED_CROPS_LEFT ?= $(VERIFY_RENDERED_CROPS_OUT)/run1 COMPARE_RENDERED_CROPS_RIGHT ?= $(VERIFY_RENDERED_CROPS_OUT)/run2 LAYOUT_EVALUATOR_OUT ?= $(ROOT)/target/layout-evaluator-alpha -.PHONY: verify-alpha verify-alpha-tree rag-chunk-alpha security-report-alpha verify-rendered-crops compare-rendered-crops layout-evaluator-alpha python-surface-test milestone-b-internal-checks milestone-c-internal-checks release-hygiene release-advisory third-party-license-manifest release-notice-draft +.PHONY: verify-alpha verify-alpha-tree rag-chunk-alpha security-report-alpha milestone-d-verify-citations-contract verify-rendered-crops compare-rendered-crops layout-evaluator-alpha python-surface-test milestone-b-internal-checks milestone-c-internal-checks release-hygiene release-advisory third-party-license-manifest release-notice-draft $(ETHOS_BIN): cargo build --locked -p ethos-cli @@ -47,6 +47,14 @@ security-report-alpha: $(PYTHON) .github/scripts/test_security_report_alpha.py git diff --check +milestone-d-verify-citations-contract: + cargo test --locked -p ethos-cli --test verify + $(PYTHON) schemas/validate_examples.py + $(PYTHON) .github/scripts/test_execution_status.py + $(PYTHON) .github/scripts/test_roadmap_status.py + $(PYTHON) .github/scripts/test_milestone_d_verify_citations_contract.py + git diff --check + verify-rendered-crops: $(ETHOS_BIN) $(PYTHON) examples/verify/check_rendered_crops.py --repo-root $(ROOT) --ethos-bin $(ETHOS_BIN) --out-dir $(VERIFY_RENDERED_CROPS_OUT) git diff --check diff --git a/docs/execution-status.md b/docs/execution-status.md index 587ea2c..0d6a405 100644 --- a/docs/execution-status.md +++ b/docs/execution-status.md @@ -2,7 +2,7 @@ Date: 2026-06-18 Owner: product / decider -Status: Pre-alpha / internal Milestone C artifact-validation closeout. Week 0 governance is accepted, WS-ENGINE Phase 1 has a real narrow PDFium path, WS-VERIFY-ALPHA has real deterministic evidence checks over native Ethos JSON and pinned OpenDataLoader output, WS-HARNESS has fail-closed readiness scaffolding, the Gate Zero corpus/hardware manifest and direct competitor lock are frozen/signed, ADR-0005 records an accepted `PROCEED` decision for internal Milestone B continuation, ADR-0006 closes package identifier/trademark validation, ADR-0007 locks the product direction, and the public-source preflight is green for a source-only pre-alpha GitHub push. Current Milestone C work has a source-tree internal artifact-validation closeout for the RAG chunk and security-report trust-loop checks. Public benchmark reports, releases, packages, production positioning, and all performance/quality/footprint claims remain blocked. The controlled-run handoff remains `docs/gate-zero-evidence-runbook.md`; the accepted decision record is `docs/decisions/ADR-0005-gate-zero-decision.md`. +Status: Pre-alpha / internal transition from Milestone C artifact-validation closeout to Milestone D source-only contract work. Week 0 governance is accepted, WS-ENGINE Phase 1 has a real narrow PDFium path, WS-VERIFY-ALPHA has real deterministic evidence checks over native Ethos JSON and pinned OpenDataLoader output, WS-HARNESS has fail-closed readiness scaffolding, the Gate Zero corpus/hardware manifest and direct competitor lock are frozen/signed, ADR-0005 records an accepted `PROCEED` decision for internal Milestone B continuation, ADR-0006 closes package identifier/trademark validation, ADR-0007 locks the product direction, and the public-source preflight is green for a source-only pre-alpha GitHub push. Milestone C has a source-tree internal artifact-validation closeout for the RAG chunk and security-report trust-loop checks. Current Milestone D work begins with the narrow `verify_citations` v1 contract in `docs/milestone-d-verify-citations-contract.md`, carried by the existing `ethos verify` path and fixture-backed validation. Public benchmark reports, releases, packages, production positioning, and all performance/quality/footprint claims remain blocked. The controlled-run handoff remains `docs/gate-zero-evidence-runbook.md`; the accepted decision record is `docs/decisions/ADR-0005-gate-zero-decision.md`. ## Current Reality @@ -26,6 +26,7 @@ The committed implementation now includes: - `ethos rag chunk` has a committed-example artifact loop over `schemas/examples/document.example.json` and `schemas/examples/chunks.example.jsonl`. The current internal checks cover exact fixture/golden output, repeated-run byte identity, schema/example validation, stale page/element/bbox-page reference rejection, and default-chunk exclusion warning-reference rejection. - `ethos security report` has a source-only pre-alpha artifact check over the committed document example. The current internal checks cover deterministic report output, report/source identity grounding, security-warning lane and message diagnostics, locator grounding, inventory/report parity, summary drift, warning id uniqueness, deterministic warning numbering, and explicit rejection of unsupported current source-warning references. - `make milestone-c-internal-checks` composes the current internal Milestone C artifact-validation path across RAG chunk and security-report gates; CI/static guard scripts fail closed if that command wiring or the dated closeout record drifts. +- Milestone D source-only pre-alpha contract work has started with `docs/milestone-d-verify-citations-contract.md`. It defines `verify_citations` v1 as the citation-input, verification-config, grounding-source, and verification-report contract currently carried by `ethos verify`; schema/example validation checks that the minimal citation example and verification-report example stay coherent. Focused validation is `make milestone-d-verify-citations-contract PYTHON=/bin/python`. Still absent or not claimable: public benchmark reports, public competitor-comparison claims, public speed/quality/footprint claims, OCR/image-only support, real table extraction, mature list/heading/layout semantics beyond current fixture-backed alpha paths, semantic/arithmetic verification beyond deterministic evidence lookup, Phase 2 project-maintained PDFium builds, release packaging, and claim-audit approval for any public result wording. @@ -48,7 +49,7 @@ The corpus/hardware freeze and direct competitor pins are recorded in `benchmark ## Current Milestone Posture -Milestone A has an accepted internal Gate Zero decision for roadmap control, Milestone B is internally closed for the current source-tree validation scope, and Milestone C now has an internal artifact-validation closeout record. The product can demonstrate a narrow parser-backed grounding loop today, but the decision cannot be used as public benchmark credibility. +Milestone A has an accepted internal Gate Zero decision for roadmap control, Milestone B is internally closed for the current source-tree validation scope, and Milestone C now has an internal artifact-validation closeout record. The first Milestone D slice is contract work for `verify_citations` v1 over the existing source-tree trust loop. The product can demonstrate a narrow parser-backed grounding loop today, but the decision cannot be used as public benchmark credibility. | Work item | Current status | Remaining blocker | | --- | --- | --- | @@ -60,7 +61,7 @@ Milestone A has an accepted internal Gate Zero decision for roadmap control, Mil | Python surface scaffold | Landed: internal stdlib wrapper over a caller-provided local `ethos doc parse` command, with explicit JSON/Markdown/text methods, page selection passthrough, diagnostics passthrough, timeout handling, command failure reporting, and mocked-command unit coverage | Native binding work, broader API design, and public setup path remain future work | | Font policy groundwork | Partially landed: substitution table and profile policy are present; substitution-table bytes are pinned by the deterministic profile and checked by schema/example validation; absent bundled fallback assets must remain represented by a null fallback-bundle hash; fixture output uses deterministic substitution IDs, committed embedded-font fixture metadata now binds expected extraction font identity, document schema/font extraction keep emitted font ids inside the deterministic ASCII `embedded:` / `subst:` contract, and CLI font-isolation PDFs are manifest/hash-bound | Bundled fallback asset introduction/hash pinning and broader font/CID validation remain open | | Schema/example validation | Landed: schemas, examples, deterministic profile, referential integrity, and bbox sanity pass the `jsonschema` validation gate | Contract changes still require explicit versioning and compatibility review | -| Trust-layer implementation | Landed: `ethos verify` quote/value/presence/table-cell checks, explicit quote-containment labeling, normalized equality for value/table-cell checks, stale and unverifiable fingerprint handling, unsupported claim reporting, structured capability limits, native Ethos JSON path, ODL-style adapter path with synthetic table/cell mapping, explicit real ODL-style row/cell table grounding, conservative real-style text/child-container alias normalization, pinned real OpenDataLoader 2.4.7 grounded/ungrounded fixtures, foreign fixture manifest hash validation, crop-ref evidence plumbing, stable logical native crop refs, native crop descriptor artifacts, raw BGRA crop rendering in `ethos-pdf`, CLI PNG crop artifact production for bound native source PDFs, same-host rendered crop repeatability check, rendered-crop run comparison helper, strict citation/config input validation, citation input schema, split-quote fixture coverage, explicit unsupported non-v1 claim reporting, OpenDataLoader-style structure diagnostics for malformed bbox and unknown-page references, verify-alpha case inventory checks, and demo fixtures | Still needed: additional adapter hardening against broader real output shapes, future claim-kind expansion outside the current v1 alpha policy, and a decision on whether cross-platform rendered crop artifact equality is worth pursuing after the current macOS/Linux bbox drift finding | +| Trust-layer implementation | Landed: `ethos verify` quote/value/presence/table-cell checks, explicit quote-containment labeling, normalized equality for value/table-cell checks, stale and unverifiable fingerprint handling, unsupported claim reporting, structured capability limits, native Ethos JSON path, ODL-style adapter path with synthetic table/cell mapping, explicit real OpenDataLoader-style row/cell table grounding, conservative real-style text/child-container alias normalization, pinned real OpenDataLoader 2.4.7 grounded/ungrounded fixtures, foreign fixture manifest hash validation, crop-ref evidence plumbing, stable logical native crop refs, native crop descriptor artifacts, raw BGRA crop rendering in `ethos-pdf`, CLI PNG crop artifact production for bound native source PDFs, same-host rendered crop repeatability check, rendered-crop run comparison helper, strict citation/config input validation, citation input schema, split-quote fixture coverage, explicit unsupported non-v1 claim reporting, OpenDataLoader-style structure diagnostics for malformed bbox and unknown-page references, verify-alpha case inventory checks, demo fixtures, and a first Milestone D `verify_citations` v1 contract note that binds the current citation-input to verification-report contract without adding a new command or binding surface | Still needed: additional adapter hardening against broader real output shapes, future claim-kind expansion outside the current v1 alpha policy, first-class crop API work, sandbox/subprocess backend work, and a decision on whether cross-platform rendered crop artifact equality is worth pursuing after the current macOS/Linux bbox drift finding | | RAG chunk artifact checks | Landed for current source examples: deterministic command-level fixture/golden output, repeated-run byte identity, schema/example validation, stale reference rejection, and default-chunk exclusion warning-reference rejection | Broader chunk provenance/citation policy and future parser/table integration remain future work | | Security report artifact checks | Landed for current source examples: source-only `ethos security report`, deterministic output, source/report identity validation, security-warning locator grounding, warning-lane diagnostics, inventory/report parity, summary drift checks, and deterministic warning id checks | Broader security-report generation semantics, debug overlay integration, and future artifact UX remain future work | | WS-HARNESS readiness | Partially landed: readiness path is green for frozen corpus/hardware and pinned competitors, Gate Zero evidence preflight validates the current `ethos-bench` handoff, and gates fail closed if those records regress | Public-safe comparison report flow, release/package approval, claim-wording approval, and future evidence-refresh workflow still need hardening | @@ -68,4 +69,4 @@ Milestone A has an accepted internal Gate Zero decision for roadmap control, Mil ## PM Rule -Public language stays at "pre-alpha / internal Milestone C artifact-validation closeout" until the remaining external blockers are closed and the claim audit approves specific wording. Do not describe Ethos as benchmark-validated, release-ready, production-ready, or broadly parser-complete. Internal parser work should proceed only when it supports accepted evidence paths or the trust layer; the product-differentiating path remains verification and grounding first, with parser expansion serving that path. +Public language stays at "source-only pre-alpha / internal Milestone C closeout and Milestone D contract continuation" until the remaining external blockers are closed and the claim audit approves specific wording. Do not describe Ethos as benchmark-validated, release-ready, production-ready, or broadly parser-complete. Internal parser work should proceed only when it supports accepted evidence paths or the trust layer; the product-differentiating path remains verification and grounding first, with parser expansion serving that path. diff --git a/docs/milestone-d-verify-citations-contract.md b/docs/milestone-d-verify-citations-contract.md new file mode 100644 index 0000000..d269946 --- /dev/null +++ b/docs/milestone-d-verify-citations-contract.md @@ -0,0 +1,89 @@ +# Milestone D `verify_citations` v1 Contract + +Status: source-only pre-alpha contract work for internal Milestone D continuation. + +This note defines the narrow first Milestone D slice for `verify_citations` v1. It does not +create a new public command, binding, or hosted surface. The current executable carrier remains +`ethos verify`; `verify_citations` names the contract between citation input, grounding source, +verification config, and verification report. + +## Contract Surface + +`verify_citations` v1 consumes: + +- a grounding source: canonical Ethos document JSON, or a declared foreign adapter such as + `opendataloader-json`; +- citation input governed by `schemas/ethos-citations.schema.json`; +- the pinned default verification config, or an explicit config governed by + `schemas/ethos-verification-config.schema.json`. + +It emits `verification_report.json`, governed by +`schemas/ethos-verification-report.schema.json`. + +The example pair `schemas/examples/citations.example.json` and +`schemas/examples/verification-report.example.json` is the minimal source-tree fixture for this +contract. Schema validation now checks that the example report echoes the example claims in input +order and keeps the grounded gate coherent. + +Focused validation command: + +- `make milestone-d-verify-citations-contract PYTHON=/bin/python` + +The target runs verifier contract tests, schema/example validation, status/roadmap guards, this +contract guard, and diff hygiene. It intentionally stays narrower than prior milestone composite +checks. + +## Supported v1 Checks + +The v1 supported claim kinds are: + +- `quote` +- `value` +- `presence` +- `table_cell` + +`region` and `other` remain explicit unsupported non-v1 inputs. They must be reported as +unsupported instead of approximated. + +Evidence grounding is literal and diagnostic: + +- quote checks use exact or normalized containment; +- value and table-cell checks use exact or normalized equality / table-cell lookup; +- presence checks only confirm the cited target exists; +- stale fingerprints, missing locators, missing source capabilities, and missing targets fail + closed with stable check reasons; +- `all_evidence_grounded` is true only under the invariant documented in + `schemas/ethos-verification-report.schema.json` and implemented by + `compute_all_evidence_grounded`. + +`semantic_unverified` stays false for current literal checks. Work that needs paraphrase, +arithmetic, cross-region synthesis, or unmodeled evidence must not be silently treated as +grounded. + +## Capability And Crop Boundaries + +Grounding sources declare capabilities in the verification report. Missing spans, char offsets, +tables, fingerprints, coordinate origins, or crop support surface as capability limits and +diagnostics. + +Crop evidence remains bounded to existing verifier plumbing: + +- `crop_ref` can appear only when the active config requests crops and the grounding source + declares crop support; +- native crop descriptors and rendered crop artifacts remain verifier evidence artifacts; +- the broader first-class crop API is separate Milestone D work and is not part of this contract + slice. + +## Explicit Blockers For This Slice + +This first D slice does not add: + +- a new `verify_citations` CLI alias; +- Python, Node, MCP, or hosted API surfaces; +- broad foreign-adapter hardening beyond existing fixtures; +- crop API implementation; +- sandbox/subprocess backend expansion; +- semantic or arithmetic verification. + +Public-facing language remains limited to source-only pre-alpha internal continuation, evidence +grounding, diagnostics, fixture-backed validation, and explicit blockers. diff --git a/docs/roadmap.md b/docs/roadmap.md index 8a72b77..7cca555 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -16,13 +16,19 @@ external blockers. This closeout does not approve public benchmark reports, releases, packages, production positioning, or performance/quality/footprint claims. +Milestone D source-only pre-alpha contract work has started with the narrow +[`verify_citations` v1 contract](milestone-d-verify-citations-contract.md). The +current executable carrier remains `ethos verify`; the first D slice is a +contract and fixture-backed validation boundary, not a new public command, +binding, crop API, sandbox backend, Node beta, or MCP experimental scope. + | Milestone | Window | Contents | Gate | | --- | --- | --- | --- | | Week 0 | pre-kickoff | ADRs, governance, corpus freeze, CI bootstrap, competitor pins | All 11 rows done; clock starts | | A | weeks 1-8 | Contracts (5 schemas, c14n, deterministic profile), trust-boundary artifacts (`GroundingSource`, verification schemas, OpenDataLoader adapter stub, `ethos verify` CLI stub), PDFium Phase 1 spike, harness + competitor adapters, CLI skeleton | **Gate Zero**: ADR-0005 is accepted as `PROCEED` for internal Milestone B continuation. This is not public benchmark, release, package, production, or claim approval. | | B | weeks 9-14 | **`ethos verify` alpha first**: native Ethos JSON + synthetic and pinned real OpenDataLoader verification demos, stale fingerprint checks, capability-limited reports, deterministic evidence matching including split-quote coverage, explicit unsupported non-v1 claim reporting, adapter structure diagnostics; then reading order, blocks, headings, lists, Markdown/text exporters, Python wheel scaffold, quality dashboard, Windows x64 nightly determinism | [13-B exit checklist](milestone-b-exit-checklist.md) | | C | weeks 15-22 | Simple/bordered tables; RAG chunker + citations; non-text region coordinates; security report + default-chunk exclusion; debug overlay; internal benchmark snapshot | Current artifact-validation checkpoint recorded in [Milestone C closeout validation](validation/milestone-c-closeout-validation-2026-06-18.md); broader debug/crop/table follow-ups remain explicit | -| D | weeks 23-30 | `verify_citations` v1; crop API; sandbox/subprocess backend; Node beta and MCP experimental only if staffed or accepted by release-scope ADR | 13-D exit | +| D | weeks 23-30 | [`verify_citations` v1](milestone-d-verify-citations-contract.md); crop API; sandbox/subprocess backend; Node beta and MCP experimental only if staffed or accepted by release-scope ADR | 13-D exit | | E | weeks 31-40 | Public benchmark report (reproducible, labeled tiers); PDFium Phase 2 project-maintained builds; stable CLI/Python docs; proof-of-trust demos; **Public Beta** | Release 1 claim audit + public-beta checkpoint | | F / Release 2 | post-E | Complex tables, formula/LaTeX, chart classification, optional enrichment modules (never base) | Scoped after E from beta fixtures | diff --git a/schemas/README.md b/schemas/README.md index 3fdf9f6..925c525 100644 --- a/schemas/README.md +++ b/schemas/README.md @@ -29,5 +29,10 @@ security-report / verification-report examples). `verification-report-negative.example.json` shows a non-grounded report with a per-check `reason` label. +Milestone D `verify_citations` v1 contract work is tracked in +`docs/milestone-d-verify-citations-contract.md`. In this source-only pre-alpha slice, +`verify_citations` names the citation-input to verification-report contract currently carried by +`ethos verify`; it does not add a new command or binding surface. + Derived artifacts not schema'd here: `document.md` / `document.txt` (deterministic exports specified by the exporter config, Milestone B) and `debug.html` (Milestone C). diff --git a/schemas/validate_examples.py b/schemas/validate_examples.py index da2b287..3a6f85c 100644 --- a/schemas/validate_examples.py +++ b/schemas/validate_examples.py @@ -320,6 +320,7 @@ def c14n_value(v) -> str: sec = actual_security_report sec_full = json.loads((EXAMPLES / "security-report.full.example.json").read_text(encoding="utf-8")) ver = json.loads((EXAMPLES / "verification-report.example.json").read_text(encoding="utf-8")) +citations = json.loads((EXAMPLES / "citations.example.json").read_text(encoding="utf-8")) crop = json.loads((EXAMPLES / "crop-descriptor.example.json").read_text(encoding="utf-8")) for label, got in [ ("security-report.document_fingerprint", sec["document_fingerprint"]), @@ -348,6 +349,41 @@ def c14n_value(v) -> str: else: print("ok security report examples findings are grounded in document example") +# Milestone D verify_citations v1 contract fixture: the minimal citation input example and +# grounded report example must describe the same ordered claims over the document example. +verify_citations_failures_before = failures +if citations["document_fingerprint"] != doc["fingerprint"]: + fail("citations.example.json document_fingerprint diverges from document example") +if ver["document_fingerprint"] != citations["document_fingerprint"]: + fail("verification-report.example.json document_fingerprint diverges from citations example") +claims = citations["claims"] +checks = ver["checks"] +if len(checks) != len(claims): + fail( + "verification-report.example.json checks do not match citations.example.json claims " + f"({len(checks)} checks vs {len(claims)} claims)" + ) +else: + for index, (claim, check) in enumerate(zip(claims, checks), 1): + expected_id = f"v{index:04}" + if check["id"] != expected_id: + fail(f"verification-report.example.json check {index} id is not {expected_id}") + if c14n_value(check["claim"]) != c14n_value(claim): + fail( + "verification-report.example.json check " + f"{check['id']} does not echo citations.example.json claim {index}" + ) + if check["status"] != "grounded": + fail(f"verification-report.example.json check {check['id']} is not grounded") + if check["semantic_unverified"]: + fail(f"verification-report.example.json check {check['id']} is semantically unverified") +if not ver["all_evidence_grounded"]: + fail("verification-report.example.json all_evidence_grounded is not true") +if ver["unsupported_claim_kinds"]: + fail("verification-report.example.json unexpectedly has unsupported claim kinds") +if failures == verify_citations_failures_before: + print("ok verify_citations v1 example pair coherent") + # deterministic profile font-policy artifact checks profile = json.loads( (ROOT / "profiles" / "ethos-deterministic-v1.json").read_text(encoding="utf-8")