From 9dc2240a7f8f819f1a283365264f1287cf0d96ae Mon Sep 17 00:00:00 2001 From: docushell-admin Date: Tue, 16 Jun 2026 23:25:44 +0530 Subject: [PATCH] Validate fixture span expectation metadata Signed-off-by: docushell-admin --- docs/execution-status.md | 4 +- fixtures/README.md | 4 + fixtures/evaluate_layout_alpha.py | 135 ++++++++++++++++++++++++- fixtures/test_evaluate_layout_alpha.py | 55 ++++++++++ fixtures/validate_fixtures.py | 53 ++++++++++ 5 files changed, 248 insertions(+), 3 deletions(-) diff --git a/docs/execution-status.md b/docs/execution-status.md index 7cb3630..3aff4e6 100644 --- a/docs/execution-status.md +++ b/docs/execution-status.md @@ -16,7 +16,7 @@ The committed implementation now includes: - The determinism workflow includes a Windows x64 preflight lane for core c14n/profile/fingerprint contract tests, while PDFium-backed corpus work remains explicitly skipped unless the pinned runtime is configured on that runner. A static workflow test guards that matrix wiring. - `ethos doc parse` / `ethos fingerprint` PDF execution through a worker process with `max_parse_ms` timeout enforcement, stable error-envelope relay, diagnostics-gated worker stderr, and page-range validation/filtering. - Quantized page/span extraction at the backend boundary, plus a basic deterministic layout pass that assembles paragraph `text_block` elements, fixture-backed alpha heading and flat list-item elements, and simple column reading order for the current born-digital fixtures. Current alpha layout confidence is explicit for heading signals, and below-threshold layout confidence emits deterministic `low_confidence_reading_order` diagnostics instead of staying silent. Fixture validation binds selected `fixture.json` expectations to committed extraction/layout goldens and binds current alpha text/Markdown exports to committed layout output so current read-order, element-type, heading-export, list-item, and export cases fail closed on drift. -- An internal layout evaluator scaffold exists at `fixtures/evaluate_layout_alpha.py` and `make layout-evaluator-alpha`. It reads committed `fixture.json`, `extraction.json`, `layout.json`, `text.txt`, and `markdown.md` files, summarizes alpha element-type and subset coverage, and fails closed on missing layout expectations, dangling/invalid warning references, confidence-policy drift, export-golden drift, expected page/span-text/font-id drift, expected rotation drift, or drift in fixture-backed reading order / heading / list-item / hyphenation / ligature cases. +- An internal layout evaluator scaffold exists at `fixtures/evaluate_layout_alpha.py` and `make layout-evaluator-alpha`. It reads committed `fixture.json`, `extraction.json`, `layout.json`, `text.txt`, and `markdown.md` files, summarizes alpha element-type and subset coverage, and fails closed on missing layout expectations, dangling/invalid warning references, confidence-policy drift, export-golden drift, invalid span expectation metadata, expected page/span-text/font-id drift, expected rotation drift, or drift in fixture-backed reading order / heading / list-item / hyphenation / ligature cases. - Schema/example/profile validation is green through `schemas/validate_examples.py` using `jsonschema` draft 2020-12 validation, including the crop descriptor artifact contract plus referential-integrity and bbox sanity checks outside JSON Schema. - `ethos verify` now produces non-empty quote, value, presence, and table-cell verification checks over native Ethos document JSON and synthetic OpenDataLoader-style JSON through `--grounding opendataloader-json`; it also verifies quote/value/presence citations over pinned real OpenDataLoader 2.4.7 JSON, including grounded and ungrounded cases. Citation/config inputs are rejected when they drift outside the closed schemas. The public demo harness covers grounded, ungrounded, split-quote, not-found, stale-fingerprint, unsupported non-v1 claim, capability-limited, malformed-citation, malformed OpenDataLoader-style input, and summary-format reject paths. - Verification semantics are now trust-honest at alpha scope: quote containment is explicitly labeled, value/table-cell checks require normalized equality, fingerprint-pinned citations fail closed when source fingerprints are unavailable, and structured capability limits explain why a run is downgraded. @@ -53,7 +53,7 @@ Milestone A has an accepted internal Gate Zero decision for roadmap control, so | PDFium loader/runtime checks | Landed: missing/mismatched version, artifact, and runtime library hashes fail deterministically | Release packaging and operator setup path still need hardening | | Real PDF backend | Landed for simple born-digital PDFs: page count, quantized spans, worker execution, timeout, page filtering, and fingerprint path exist | Wider corpus coverage, failure fixtures, memory-limit behavior, quirk log, and Gate Zero run are still missing | | Layout groundwork | Landed: basic paragraph text blocks, fixture-backed alpha heading and flat list-item elements, simple column reading order over quantized spans, explicit alpha heading-confidence values, deterministic below-threshold confidence diagnostics, fixture metadata checks against committed extraction/layout goldens for current read-order and element-type expectations, and alpha text/Markdown export goldens derived from committed layout output | Tables, nested/richer list and heading semantics, broader rotation/quirk handling, and broader confidence dimensions remain future work | -| Layout evaluator scaffold | Landed: deterministic internal evaluator over committed extraction/layout fixture expectations, with heading/list/reading-order/rotation/hyphenation/ligature/font-identity coverage checks, expected page/span-text/font-id checks, warning-reference checks, confidence-policy checks, text/Markdown export-golden checks, expectation drift diagnostics, report JSON, Make target, and unit coverage | Broader evaluator dimensions and CI matrix integration remain future work | +| Layout evaluator scaffold | Landed: deterministic internal evaluator over committed extraction/layout fixture expectations, with heading/list/reading-order/rotation/hyphenation/ligature/font-identity/span-expectation coverage checks, expected page/span-text/font-id checks, expected-spans metadata validation, warning-reference checks, confidence-policy checks, text/Markdown export-golden checks, expectation drift diagnostics, report JSON, Make target, and unit coverage | Broader evaluator dimensions and CI matrix integration remain future work | | Python surface scaffold | Landed: internal stdlib wrapper over a caller-provided local `ethos doc parse` command, with explicit JSON/Markdown/text methods, page selection passthrough, diagnostics passthrough, timeout handling, command failure reporting, and mocked-command unit coverage | Native binding work, broader API design, and public setup path remain future work | | Font policy groundwork | Partially landed: substitution table and profile policy are present; fixture output uses deterministic substitution IDs, and committed embedded-font fixture metadata now binds expected extraction font identity | Bundled fallback asset hashing and broader font/CID validation remain open | | Schema/example validation | Landed: schemas, examples, deterministic profile, referential integrity, and bbox sanity pass the `jsonschema` validation gate | Contract changes still require explicit versioning and compatibility review | diff --git a/fixtures/README.md b/fixtures/README.md index 04cdb3a..4f86682 100644 --- a/fixtures/README.md +++ b/fixtures/README.md @@ -45,6 +45,10 @@ expectations to those committed goldens: - `expected_pages`: exact `extraction.json` page count. - `expected_span_text`: exact `extraction.json` span text order. +- `expected_font_id`: exact `font_id` required on every committed extraction span for a + focused single-font fixture. +- `expected_spans`: fixture-owned span expectation metadata with `text`, `char_start`, and + `char_end`; today this is validated for shape and consistency with `expected_span_text`. - `expected_elements`: exact `layout.json` element count. - `expected_element_types`: exact `layout.json` element type order. - `expected_text`: exact `layout.json` element text order. Use a string for a single diff --git a/fixtures/evaluate_layout_alpha.py b/fixtures/evaluate_layout_alpha.py index 26e138d..88f8ef8 100644 --- a/fixtures/evaluate_layout_alpha.py +++ b/fixtures/evaluate_layout_alpha.py @@ -70,6 +70,10 @@ "subset": "fonts", "expected_font_id": True, }, + "span_expectation_fixture": { + "subset": "ligatures", + "expected_spans": True, + }, } @@ -108,7 +112,8 @@ def main(argv: Optional[List[str]] = None) -> int: ) print( "ok layout evaluator " - "heading/list/reading-order/rotation/hyphenation/ligature/font-identity coverage present" + "heading/list/reading-order/rotation/hyphenation/ligature/" + "font-identity/span-expectation coverage present" ) print("ok layout evaluator export and warning diagnostics present") if args.out is not None: @@ -308,6 +313,7 @@ def evaluate_fixture( expected_pages = metadata.get("expected_pages") expected_span_text = metadata.get("expected_span_text") expected_font_id = metadata.get("expected_font_id") + expected_spans = metadata.get("expected_spans") expected_rotation = metadata.get("expected_rotation") expected_text_status = compare_expected_text( @@ -352,6 +358,13 @@ def evaluate_fixture( extraction, diagnostics, ) + expected_spans_status = compare_expected_spans( + fixture_id, + fixture_rel, + expected_spans, + expected_span_text, + diagnostics, + ) expected_rotation_status = compare_expected_rotation( fixture_id, fixture_rel, @@ -402,6 +415,7 @@ def evaluate_fixture( "expected_pages": expected_pages_status, "expected_span_text": expected_span_text_status, "expected_font_id": expected_font_id_status, + "expected_spans": expected_spans_status, "expected_rotation": expected_rotation_status, "warning_shape": warning_shape_status, "confidence_policy": confidence_policy_status, @@ -698,6 +712,123 @@ def compare_expected_font_id( return "pass" +def compare_expected_spans( + fixture_id: str, + fixture_rel: str, + expected_spans: Any, + expected_span_text: Any, + diagnostics: List[Dict[str, Any]], +) -> str: + if expected_spans is None: + return "not_declared" + if not isinstance(expected_spans, list): + diagnostics.append( + diagnostic( + "invalid_expectation", + fixture_id, + "expected_spans must be an object array", + f"{fixture_rel}/fixture.json", + ) + ) + return "invalid" + + span_text = [] + previous_end = 0 + for span_index, span in enumerate(expected_spans): + if not isinstance(span, dict): + diagnostics.append( + diagnostic( + "invalid_expectation", + fixture_id, + f"expected_spans[{span_index}] must be an object", + f"{fixture_rel}/fixture.json", + ) + ) + return "invalid" + if set(span) != {"text", "char_start", "char_end"}: + diagnostics.append( + diagnostic( + "invalid_expectation", + fixture_id, + "expected_spans entries must contain exactly " + "text, char_start, and char_end", + f"{fixture_rel}/fixture.json", + ) + ) + return "invalid" + text = span.get("text") + char_start = span.get("char_start") + char_end = span.get("char_end") + if not isinstance(text, str) or not text: + diagnostics.append( + diagnostic( + "invalid_expectation", + fixture_id, + f"expected_spans[{span_index}].text must be a non-empty string", + f"{fixture_rel}/fixture.json", + ) + ) + return "invalid" + if ( + not isinstance(char_start, int) + or isinstance(char_start, bool) + or char_start < 0 + ): + diagnostics.append( + diagnostic( + "invalid_expectation", + fixture_id, + f"expected_spans[{span_index}].char_start must be an integer >= 0", + f"{fixture_rel}/fixture.json", + ) + ) + return "invalid" + if ( + not isinstance(char_end, int) + or isinstance(char_end, bool) + or char_end <= char_start + ): + diagnostics.append( + diagnostic( + "invalid_expectation", + fixture_id, + f"expected_spans[{span_index}].char_end must be an integer " + "greater than char_start", + f"{fixture_rel}/fixture.json", + ) + ) + return "invalid" + if span_index > 0 and char_start < previous_end: + diagnostics.append( + diagnostic( + "invalid_expectation", + fixture_id, + "expected_spans char ranges must be ordered and non-overlapping", + f"{fixture_rel}/fixture.json", + ) + ) + return "invalid" + previous_end = char_end + span_text.append(text) + + if isinstance(expected_span_text, list) and all( + isinstance(item, str) for item in expected_span_text + ): + if span_text != expected_span_text: + diagnostics.append( + diagnostic( + "expected_spans_mismatch", + fixture_id, + "expected_spans text does not match expected_span_text", + f"{fixture_rel}/fixture.json", + expected=expected_span_text, + actual=span_text, + ) + ) + return "mismatch" + return "pass" + + def compare_expected_rotation( fixture_id: str, fixture_rel: str, @@ -1210,6 +1341,8 @@ def update_coverage( continue if requirement.get("expected_font_id") and check["expected_font_id"] != "pass": continue + if requirement.get("expected_spans") and check["expected_spans"] != "pass": + continue coverage[gate].append(check["fixture_id"]) diff --git a/fixtures/test_evaluate_layout_alpha.py b/fixtures/test_evaluate_layout_alpha.py index 01121aa..c0987d1 100644 --- a/fixtures/test_evaluate_layout_alpha.py +++ b/fixtures/test_evaluate_layout_alpha.py @@ -54,6 +54,7 @@ def test_passing_fixture_set_reports_counts_and_coverage(self) -> None: "list_item_fixture": ["list-case"], "multi_column_reading_order_fixture": ["column-case"], "rotation_fixture": ["rotation-case"], + "span_expectation_fixture": ["ligature-case"], }, ) heading_check = next( @@ -62,6 +63,7 @@ def test_passing_fixture_set_reports_counts_and_coverage(self) -> None: self.assertEqual(heading_check["expected_font_id"], "not_declared") self.assertEqual(heading_check["expected_pages"], "not_declared") self.assertEqual(heading_check["expected_span_text"], "not_declared") + self.assertEqual(heading_check["expected_spans"], "not_declared") self.assertEqual(heading_check["confidence_policy"], "pass") self.assertEqual(heading_check["warning_shape"], "pass") self.assertEqual( @@ -79,6 +81,7 @@ def test_passing_fixture_set_reports_counts_and_coverage(self) -> None: self.assertEqual(ligature_check["expected_pages"], "pass") self.assertEqual(ligature_check["expected_span_text"], "pass") self.assertEqual(ligature_check["expected_font_id"], "pass") + self.assertEqual(ligature_check["expected_spans"], "pass") rotation_check = next( check for check in report["checks"] if check["fixture_id"] == "rotation-case" ) @@ -188,6 +191,51 @@ def test_expected_span_text_rejects_non_string_extraction_text(self) -> None: self.assertDiagnostic(report, "invalid_extraction", "ligature-case") self.assertDiagnostic(report, "missing_coverage", None) + def test_expected_spans_text_must_match_expected_span_text(self) -> None: + self.write_required_alpha_fixture_set() + metadata_path = self.root / "synthetic/ligature-case/fixture.json" + metadata = json.loads(metadata_path.read_text(encoding="utf-8")) + metadata["expected_spans"][1]["text"] = "wrong" + self.write_json(metadata_path, metadata) + + report = evaluate_layout_alpha(self.root) + + self.assertEqual(report["status"], "fail") + diagnostic = self.onlyDiagnostic( + report, + "expected_spans_mismatch", + "ligature-case", + ) + self.assertEqual(diagnostic["expected"], ["office", "file"]) + self.assertEqual(diagnostic["actual"], ["office", "wrong"]) + self.assertDiagnostic(report, "missing_coverage", None) + + def test_expected_spans_rejects_invalid_range(self) -> None: + self.write_required_alpha_fixture_set() + metadata_path = self.root / "synthetic/ligature-case/fixture.json" + metadata = json.loads(metadata_path.read_text(encoding="utf-8")) + metadata["expected_spans"][1]["char_start"] = 3 + self.write_json(metadata_path, metadata) + + report = evaluate_layout_alpha(self.root) + + self.assertEqual(report["status"], "fail") + self.assertDiagnostic(report, "invalid_expectation", "ligature-case") + self.assertDiagnostic(report, "missing_coverage", None) + + def test_expected_spans_rejects_unknown_fields(self) -> None: + self.write_required_alpha_fixture_set() + metadata_path = self.root / "synthetic/ligature-case/fixture.json" + metadata = json.loads(metadata_path.read_text(encoding="utf-8")) + metadata["expected_spans"][0]["font_id"] = "embedded:EthosLigatureFixture-Regular" + self.write_json(metadata_path, metadata) + + report = evaluate_layout_alpha(self.root) + + self.assertEqual(report["status"], "fail") + self.assertDiagnostic(report, "invalid_expectation", "ligature-case") + self.assertDiagnostic(report, "missing_coverage", None) + def test_expected_font_id_drift_reports_expected_and_actual(self) -> None: self.write_required_alpha_fixture_set() extraction_path = self.root / "synthetic/ligature-case/extraction.json" @@ -527,6 +575,10 @@ def write_required_alpha_fixture_set(self) -> None: expected_pages=1, expected_span_text=["office", "file"], expected_font_id="embedded:EthosLigatureFixture-Regular", + expected_spans=[ + {"text": "office", "char_start": 0, "char_end": 6}, + {"text": "file", "char_start": 7, "char_end": 11}, + ], spans=[ { "id": "s000001", @@ -574,6 +626,7 @@ def write_fixture( expected_pages: int | None = None, expected_span_text: list[str] | None = None, expected_font_id: str | None = None, + expected_spans: list[dict] | None = None, expected_rotation: int | None = None, page_rotation: int = 0, ): @@ -592,6 +645,8 @@ def write_fixture( metadata["expected_span_text"] = expected_span_text if expected_font_id is not None: metadata["expected_font_id"] = expected_font_id + if expected_spans is not None: + metadata["expected_spans"] = expected_spans if expected_rotation is not None: metadata["expected_rotation"] = expected_rotation self.write_json(fixture_dir / "fixture.json", metadata) diff --git a/fixtures/validate_fixtures.py b/fixtures/validate_fixtures.py index f780009..9f833c5 100644 --- a/fixtures/validate_fixtures.py +++ b/fixtures/validate_fixtures.py @@ -397,6 +397,58 @@ def validate_expected_font_id(metadata, extraction, ctx: str) -> None: fail(f"{ctx} expected_font_id must match every extraction span font_id") +def validate_expected_spans(metadata, ctx: str) -> None: + if "expected_spans" not in metadata: + return + expected_spans = metadata["expected_spans"] + if not isinstance(expected_spans, list): + fail(f"{ctx} expected_spans must be an object array") + return + + span_text = [] + previous_end = 0 + for index, span in enumerate(expected_spans): + span_ctx = f"{ctx} expected_spans[{index}]" + if not isinstance(span, dict): + fail(f"{span_ctx} must be an object") + return + if set(span) != {"text", "char_start", "char_end"}: + fail(f"{span_ctx} must contain exactly ['char_end', 'char_start', 'text']") + return + text = span.get("text") + char_start = span.get("char_start") + char_end = span.get("char_end") + if not isinstance(text, str) or not text: + fail(f"{span_ctx}.text must be a non-empty string") + return + if ( + not isinstance(char_start, int) + or isinstance(char_start, bool) + or char_start < 0 + ): + fail(f"{span_ctx}.char_start must be an integer >= 0") + return + if ( + not isinstance(char_end, int) + or isinstance(char_end, bool) + or char_end <= char_start + ): + fail(f"{span_ctx}.char_end must be an integer greater than char_start") + return + if index > 0 and char_start < previous_end: + fail(f"{span_ctx}.char_start must not precede the previous span end") + return + previous_end = char_end + span_text.append(text) + + expected_span_text = metadata.get("expected_span_text") + if isinstance(expected_span_text, list) and all( + isinstance(item, str) for item in expected_span_text + ): + if span_text != expected_span_text: + fail(f"{ctx} expected_spans text must match expected_span_text") + + def validate_stage_expectations(metadata_path: Path, metadata, extraction, layout) -> None: ctx = str(metadata_path.relative_to(ROOT)) if isinstance(extraction, dict): @@ -407,6 +459,7 @@ def validate_stage_expectations(metadata_path: Path, metadata, extraction, layou ) validate_expected_span_text(metadata, extraction, ctx) validate_expected_font_id(metadata, extraction, ctx) + validate_expected_spans(metadata, ctx) if isinstance(layout, dict): validate_expected_count( layout.get("elements", []),