From 9dc2240a7f8f819f1a283365264f1287cf0d96ae Mon Sep 17 00:00:00 2001
From: docushell-admin <hello@docushell.com>
Date: Tue, 16 Jun 2026 23:25:44 +0530
Subject: [PATCH] Validate fixture span expectation metadata

Signed-off-by: docushell-admin <hello@docushell.com>
---
 docs/execution-status.md               |   4 +-
 fixtures/README.md                     |   4 +
 fixtures/evaluate_layout_alpha.py      | 135 ++++++++++++++++++++++++-
 fixtures/test_evaluate_layout_alpha.py |  55 ++++++++++
 fixtures/validate_fixtures.py          |  53 ++++++++++
 5 files changed, 248 insertions(+), 3 deletions(-)

diff --git a/docs/execution-status.md b/docs/execution-status.md
index 7cb3630..3aff4e6 100644
--- a/docs/execution-status.md
+++ b/docs/execution-status.md
@@ -16,7 +16,7 @@ The committed implementation now includes:
 - The determinism workflow includes a Windows x64 preflight lane for core c14n/profile/fingerprint contract tests, while PDFium-backed corpus work remains explicitly skipped unless the pinned runtime is configured on that runner. A static workflow test guards that matrix wiring.
 - `ethos doc parse` / `ethos fingerprint` PDF execution through a worker process with `max_parse_ms` timeout enforcement, stable error-envelope relay, diagnostics-gated worker stderr, and page-range validation/filtering.
 - Quantized page/span extraction at the backend boundary, plus a basic deterministic layout pass that assembles paragraph `text_block` elements, fixture-backed alpha heading and flat list-item elements, and simple column reading order for the current born-digital fixtures. Current alpha layout confidence is explicit for heading signals, and below-threshold layout confidence emits deterministic `low_confidence_reading_order` diagnostics instead of staying silent. Fixture validation binds selected `fixture.json` expectations to committed extraction/layout goldens and binds current alpha text/Markdown exports to committed layout output so current read-order, element-type, heading-export, list-item, and export cases fail closed on drift.
-- An internal layout evaluator scaffold exists at `fixtures/evaluate_layout_alpha.py` and `make layout-evaluator-alpha`. It reads committed `fixture.json`, `extraction.json`, `layout.json`, `text.txt`, and `markdown.md` files, summarizes alpha element-type and subset coverage, and fails closed on missing layout expectations, dangling/invalid warning references, confidence-policy drift, export-golden drift, expected page/span-text/font-id drift, expected rotation drift, or drift in fixture-backed reading order / heading / list-item / hyphenation / ligature cases.
+- An internal layout evaluator scaffold exists at `fixtures/evaluate_layout_alpha.py` and `make layout-evaluator-alpha`. It reads committed `fixture.json`, `extraction.json`, `layout.json`, `text.txt`, and `markdown.md` files, summarizes alpha element-type and subset coverage, and fails closed on missing layout expectations, dangling/invalid warning references, confidence-policy drift, export-golden drift, invalid span expectation metadata, expected page/span-text/font-id drift, expected rotation drift, or drift in fixture-backed reading order / heading / list-item / hyphenation / ligature cases.
 - Schema/example/profile validation is green through `schemas/validate_examples.py` using `jsonschema` draft 2020-12 validation, including the crop descriptor artifact contract plus referential-integrity and bbox sanity checks outside JSON Schema.
 - `ethos verify` now produces non-empty quote, value, presence, and table-cell verification checks over native Ethos document JSON and synthetic OpenDataLoader-style JSON through `--grounding opendataloader-json`; it also verifies quote/value/presence citations over pinned real OpenDataLoader 2.4.7 JSON, including grounded and ungrounded cases. Citation/config inputs are rejected when they drift outside the closed schemas. The public demo harness covers grounded, ungrounded, split-quote, not-found, stale-fingerprint, unsupported non-v1 claim, capability-limited, malformed-citation, malformed OpenDataLoader-style input, and summary-format reject paths.
 - Verification semantics are now trust-honest at alpha scope: quote containment is explicitly labeled, value/table-cell checks require normalized equality, fingerprint-pinned citations fail closed when source fingerprints are unavailable, and structured capability limits explain why a run is downgraded.
@@ -53,7 +53,7 @@ Milestone A has an accepted internal Gate Zero decision for roadmap control, so
 | PDFium loader/runtime checks | Landed: missing/mismatched version, artifact, and runtime library hashes fail deterministically | Release packaging and operator setup path still need hardening |
 | Real PDF backend | Landed for simple born-digital PDFs: page count, quantized spans, worker execution, timeout, page filtering, and fingerprint path exist | Wider corpus coverage, failure fixtures, memory-limit behavior, quirk log, and Gate Zero run are still missing |
 | Layout groundwork | Landed: basic paragraph text blocks, fixture-backed alpha heading and flat list-item elements, simple column reading order over quantized spans, explicit alpha heading-confidence values, deterministic below-threshold confidence diagnostics, fixture metadata checks against committed extraction/layout goldens for current read-order and element-type expectations, and alpha text/Markdown export goldens derived from committed layout output | Tables, nested/richer list and heading semantics, broader rotation/quirk handling, and broader confidence dimensions remain future work |
-| Layout evaluator scaffold | Landed: deterministic internal evaluator over committed extraction/layout fixture expectations, with heading/list/reading-order/rotation/hyphenation/ligature/font-identity coverage checks, expected page/span-text/font-id checks, warning-reference checks, confidence-policy checks, text/Markdown export-golden checks, expectation drift diagnostics, report JSON, Make target, and unit coverage | Broader evaluator dimensions and CI matrix integration remain future work |
+| Layout evaluator scaffold | Landed: deterministic internal evaluator over committed extraction/layout fixture expectations, with heading/list/reading-order/rotation/hyphenation/ligature/font-identity/span-expectation coverage checks, expected page/span-text/font-id checks, expected-spans metadata validation, warning-reference checks, confidence-policy checks, text/Markdown export-golden checks, expectation drift diagnostics, report JSON, Make target, and unit coverage | Broader evaluator dimensions and CI matrix integration remain future work |
 | Python surface scaffold | Landed: internal stdlib wrapper over a caller-provided local `ethos doc parse` command, with explicit JSON/Markdown/text methods, page selection passthrough, diagnostics passthrough, timeout handling, command failure reporting, and mocked-command unit coverage | Native binding work, broader API design, and public setup path remain future work |
 | Font policy groundwork | Partially landed: substitution table and profile policy are present; fixture output uses deterministic substitution IDs, and committed embedded-font fixture metadata now binds expected extraction font identity | Bundled fallback asset hashing and broader font/CID validation remain open |
 | Schema/example validation | Landed: schemas, examples, deterministic profile, referential integrity, and bbox sanity pass the `jsonschema` validation gate | Contract changes still require explicit versioning and compatibility review |
diff --git a/fixtures/README.md b/fixtures/README.md
index 04cdb3a..4f86682 100644
--- a/fixtures/README.md
+++ b/fixtures/README.md
@@ -45,6 +45,10 @@ expectations to those committed goldens:
 
 - `expected_pages`: exact `extraction.json` page count.
 - `expected_span_text`: exact `extraction.json` span text order.
+- `expected_font_id`: exact `font_id` required on every committed extraction span for a
+  focused single-font fixture.
+- `expected_spans`: fixture-owned span expectation metadata with `text`, `char_start`, and
+  `char_end`; today this is validated for shape and consistency with `expected_span_text`.
 - `expected_elements`: exact `layout.json` element count.
 - `expected_element_types`: exact `layout.json` element type order.
 - `expected_text`: exact `layout.json` element text order. Use a string for a single
diff --git a/fixtures/evaluate_layout_alpha.py b/fixtures/evaluate_layout_alpha.py
index 26e138d..88f8ef8 100644
--- a/fixtures/evaluate_layout_alpha.py
+++ b/fixtures/evaluate_layout_alpha.py
@@ -70,6 +70,10 @@
         "subset": "fonts",
         "expected_font_id": True,
     },
+    "span_expectation_fixture": {
+        "subset": "ligatures",
+        "expected_spans": True,
+    },
 }
 
 
@@ -108,7 +112,8 @@ def main(argv: Optional[List[str]] = None) -> int:
         )
         print(
             "ok    layout evaluator "
-            "heading/list/reading-order/rotation/hyphenation/ligature/font-identity coverage present"
+            "heading/list/reading-order/rotation/hyphenation/ligature/"
+            "font-identity/span-expectation coverage present"
         )
         print("ok    layout evaluator export and warning diagnostics present")
         if args.out is not None:
@@ -308,6 +313,7 @@ def evaluate_fixture(
     expected_pages = metadata.get("expected_pages")
     expected_span_text = metadata.get("expected_span_text")
     expected_font_id = metadata.get("expected_font_id")
+    expected_spans = metadata.get("expected_spans")
     expected_rotation = metadata.get("expected_rotation")
 
     expected_text_status = compare_expected_text(
@@ -352,6 +358,13 @@ def evaluate_fixture(
         extraction,
         diagnostics,
     )
+    expected_spans_status = compare_expected_spans(
+        fixture_id,
+        fixture_rel,
+        expected_spans,
+        expected_span_text,
+        diagnostics,
+    )
     expected_rotation_status = compare_expected_rotation(
         fixture_id,
         fixture_rel,
@@ -402,6 +415,7 @@ def evaluate_fixture(
         "expected_pages": expected_pages_status,
         "expected_span_text": expected_span_text_status,
         "expected_font_id": expected_font_id_status,
+        "expected_spans": expected_spans_status,
         "expected_rotation": expected_rotation_status,
         "warning_shape": warning_shape_status,
         "confidence_policy": confidence_policy_status,
@@ -698,6 +712,123 @@ def compare_expected_font_id(
     return "pass"
 
 
+def compare_expected_spans(
+    fixture_id: str,
+    fixture_rel: str,
+    expected_spans: Any,
+    expected_span_text: Any,
+    diagnostics: List[Dict[str, Any]],
+) -> str:
+    if expected_spans is None:
+        return "not_declared"
+    if not isinstance(expected_spans, list):
+        diagnostics.append(
+            diagnostic(
+                "invalid_expectation",
+                fixture_id,
+                "expected_spans must be an object array",
+                f"{fixture_rel}/fixture.json",
+            )
+        )
+        return "invalid"
+
+    span_text = []
+    previous_end = 0
+    for span_index, span in enumerate(expected_spans):
+        if not isinstance(span, dict):
+            diagnostics.append(
+                diagnostic(
+                    "invalid_expectation",
+                    fixture_id,
+                    f"expected_spans[{span_index}] must be an object",
+                    f"{fixture_rel}/fixture.json",
+                )
+            )
+            return "invalid"
+        if set(span) != {"text", "char_start", "char_end"}:
+            diagnostics.append(
+                diagnostic(
+                    "invalid_expectation",
+                    fixture_id,
+                    "expected_spans entries must contain exactly "
+                    "text, char_start, and char_end",
+                    f"{fixture_rel}/fixture.json",
+                )
+            )
+            return "invalid"
+        text = span.get("text")
+        char_start = span.get("char_start")
+        char_end = span.get("char_end")
+        if not isinstance(text, str) or not text:
+            diagnostics.append(
+                diagnostic(
+                    "invalid_expectation",
+                    fixture_id,
+                    f"expected_spans[{span_index}].text must be a non-empty string",
+                    f"{fixture_rel}/fixture.json",
+                )
+            )
+            return "invalid"
+        if (
+            not isinstance(char_start, int)
+            or isinstance(char_start, bool)
+            or char_start < 0
+        ):
+            diagnostics.append(
+                diagnostic(
+                    "invalid_expectation",
+                    fixture_id,
+                    f"expected_spans[{span_index}].char_start must be an integer >= 0",
+                    f"{fixture_rel}/fixture.json",
+                )
+            )
+            return "invalid"
+        if (
+            not isinstance(char_end, int)
+            or isinstance(char_end, bool)
+            or char_end <= char_start
+        ):
+            diagnostics.append(
+                diagnostic(
+                    "invalid_expectation",
+                    fixture_id,
+                    f"expected_spans[{span_index}].char_end must be an integer "
+                    "greater than char_start",
+                    f"{fixture_rel}/fixture.json",
+                )
+            )
+            return "invalid"
+        if span_index > 0 and char_start < previous_end:
+            diagnostics.append(
+                diagnostic(
+                    "invalid_expectation",
+                    fixture_id,
+                    "expected_spans char ranges must be ordered and non-overlapping",
+                    f"{fixture_rel}/fixture.json",
+                )
+            )
+            return "invalid"
+        previous_end = char_end
+        span_text.append(text)
+
+    if isinstance(expected_span_text, list) and all(
+        isinstance(item, str) for item in expected_span_text
+    ):
+        if span_text != expected_span_text:
+            diagnostics.append(
+                diagnostic(
+                    "expected_spans_mismatch",
+                    fixture_id,
+                    "expected_spans text does not match expected_span_text",
+                    f"{fixture_rel}/fixture.json",
+                    expected=expected_span_text,
+                    actual=span_text,
+                )
+            )
+            return "mismatch"
+    return "pass"
+
+
 def compare_expected_rotation(
     fixture_id: str,
     fixture_rel: str,
@@ -1210,6 +1341,8 @@ def update_coverage(
             continue
         if requirement.get("expected_font_id") and check["expected_font_id"] != "pass":
             continue
+        if requirement.get("expected_spans") and check["expected_spans"] != "pass":
+            continue
         coverage[gate].append(check["fixture_id"])
 
 
diff --git a/fixtures/test_evaluate_layout_alpha.py b/fixtures/test_evaluate_layout_alpha.py
index 01121aa..c0987d1 100644
--- a/fixtures/test_evaluate_layout_alpha.py
+++ b/fixtures/test_evaluate_layout_alpha.py
@@ -54,6 +54,7 @@ def test_passing_fixture_set_reports_counts_and_coverage(self) -> None:
                 "list_item_fixture": ["list-case"],
                 "multi_column_reading_order_fixture": ["column-case"],
                 "rotation_fixture": ["rotation-case"],
+                "span_expectation_fixture": ["ligature-case"],
             },
         )
         heading_check = next(
@@ -62,6 +63,7 @@ def test_passing_fixture_set_reports_counts_and_coverage(self) -> None:
         self.assertEqual(heading_check["expected_font_id"], "not_declared")
         self.assertEqual(heading_check["expected_pages"], "not_declared")
         self.assertEqual(heading_check["expected_span_text"], "not_declared")
+        self.assertEqual(heading_check["expected_spans"], "not_declared")
         self.assertEqual(heading_check["confidence_policy"], "pass")
         self.assertEqual(heading_check["warning_shape"], "pass")
         self.assertEqual(
@@ -79,6 +81,7 @@ def test_passing_fixture_set_reports_counts_and_coverage(self) -> None:
         self.assertEqual(ligature_check["expected_pages"], "pass")
         self.assertEqual(ligature_check["expected_span_text"], "pass")
         self.assertEqual(ligature_check["expected_font_id"], "pass")
+        self.assertEqual(ligature_check["expected_spans"], "pass")
         rotation_check = next(
             check for check in report["checks"] if check["fixture_id"] == "rotation-case"
         )
@@ -188,6 +191,51 @@ def test_expected_span_text_rejects_non_string_extraction_text(self) -> None:
         self.assertDiagnostic(report, "invalid_extraction", "ligature-case")
         self.assertDiagnostic(report, "missing_coverage", None)
 
+    def test_expected_spans_text_must_match_expected_span_text(self) -> None:
+        self.write_required_alpha_fixture_set()
+        metadata_path = self.root / "synthetic/ligature-case/fixture.json"
+        metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
+        metadata["expected_spans"][1]["text"] = "wrong"
+        self.write_json(metadata_path, metadata)
+
+        report = evaluate_layout_alpha(self.root)
+
+        self.assertEqual(report["status"], "fail")
+        diagnostic = self.onlyDiagnostic(
+            report,
+            "expected_spans_mismatch",
+            "ligature-case",
+        )
+        self.assertEqual(diagnostic["expected"], ["office", "file"])
+        self.assertEqual(diagnostic["actual"], ["office", "wrong"])
+        self.assertDiagnostic(report, "missing_coverage", None)
+
+    def test_expected_spans_rejects_invalid_range(self) -> None:
+        self.write_required_alpha_fixture_set()
+        metadata_path = self.root / "synthetic/ligature-case/fixture.json"
+        metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
+        metadata["expected_spans"][1]["char_start"] = 3
+        self.write_json(metadata_path, metadata)
+
+        report = evaluate_layout_alpha(self.root)
+
+        self.assertEqual(report["status"], "fail")
+        self.assertDiagnostic(report, "invalid_expectation", "ligature-case")
+        self.assertDiagnostic(report, "missing_coverage", None)
+
+    def test_expected_spans_rejects_unknown_fields(self) -> None:
+        self.write_required_alpha_fixture_set()
+        metadata_path = self.root / "synthetic/ligature-case/fixture.json"
+        metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
+        metadata["expected_spans"][0]["font_id"] = "embedded:EthosLigatureFixture-Regular"
+        self.write_json(metadata_path, metadata)
+
+        report = evaluate_layout_alpha(self.root)
+
+        self.assertEqual(report["status"], "fail")
+        self.assertDiagnostic(report, "invalid_expectation", "ligature-case")
+        self.assertDiagnostic(report, "missing_coverage", None)
+
     def test_expected_font_id_drift_reports_expected_and_actual(self) -> None:
         self.write_required_alpha_fixture_set()
         extraction_path = self.root / "synthetic/ligature-case/extraction.json"
@@ -527,6 +575,10 @@ def write_required_alpha_fixture_set(self) -> None:
                 expected_pages=1,
                 expected_span_text=["office", "file"],
                 expected_font_id="embedded:EthosLigatureFixture-Regular",
+                expected_spans=[
+                    {"text": "office", "char_start": 0, "char_end": 6},
+                    {"text": "file", "char_start": 7, "char_end": 11},
+                ],
                 spans=[
                     {
                         "id": "s000001",
@@ -574,6 +626,7 @@ def write_fixture(
         expected_pages: int | None = None,
         expected_span_text: list[str] | None = None,
         expected_font_id: str | None = None,
+        expected_spans: list[dict] | None = None,
         expected_rotation: int | None = None,
         page_rotation: int = 0,
     ):
@@ -592,6 +645,8 @@ def write_fixture(
             metadata["expected_span_text"] = expected_span_text
         if expected_font_id is not None:
             metadata["expected_font_id"] = expected_font_id
+        if expected_spans is not None:
+            metadata["expected_spans"] = expected_spans
         if expected_rotation is not None:
             metadata["expected_rotation"] = expected_rotation
         self.write_json(fixture_dir / "fixture.json", metadata)
diff --git a/fixtures/validate_fixtures.py b/fixtures/validate_fixtures.py
index f780009..9f833c5 100644
--- a/fixtures/validate_fixtures.py
+++ b/fixtures/validate_fixtures.py
@@ -397,6 +397,58 @@ def validate_expected_font_id(metadata, extraction, ctx: str) -> None:
         fail(f"{ctx} expected_font_id must match every extraction span font_id")
 
 
+def validate_expected_spans(metadata, ctx: str) -> None:
+    if "expected_spans" not in metadata:
+        return
+    expected_spans = metadata["expected_spans"]
+    if not isinstance(expected_spans, list):
+        fail(f"{ctx} expected_spans must be an object array")
+        return
+
+    span_text = []
+    previous_end = 0
+    for index, span in enumerate(expected_spans):
+        span_ctx = f"{ctx} expected_spans[{index}]"
+        if not isinstance(span, dict):
+            fail(f"{span_ctx} must be an object")
+            return
+        if set(span) != {"text", "char_start", "char_end"}:
+            fail(f"{span_ctx} must contain exactly ['char_end', 'char_start', 'text']")
+            return
+        text = span.get("text")
+        char_start = span.get("char_start")
+        char_end = span.get("char_end")
+        if not isinstance(text, str) or not text:
+            fail(f"{span_ctx}.text must be a non-empty string")
+            return
+        if (
+            not isinstance(char_start, int)
+            or isinstance(char_start, bool)
+            or char_start < 0
+        ):
+            fail(f"{span_ctx}.char_start must be an integer >= 0")
+            return
+        if (
+            not isinstance(char_end, int)
+            or isinstance(char_end, bool)
+            or char_end <= char_start
+        ):
+            fail(f"{span_ctx}.char_end must be an integer greater than char_start")
+            return
+        if index > 0 and char_start < previous_end:
+            fail(f"{span_ctx}.char_start must not precede the previous span end")
+            return
+        previous_end = char_end
+        span_text.append(text)
+
+    expected_span_text = metadata.get("expected_span_text")
+    if isinstance(expected_span_text, list) and all(
+        isinstance(item, str) for item in expected_span_text
+    ):
+        if span_text != expected_span_text:
+            fail(f"{ctx} expected_spans text must match expected_span_text")
+
+
 def validate_stage_expectations(metadata_path: Path, metadata, extraction, layout) -> None:
     ctx = str(metadata_path.relative_to(ROOT))
     if isinstance(extraction, dict):
@@ -407,6 +459,7 @@ def validate_stage_expectations(metadata_path: Path, metadata, extraction, layou
         )
         validate_expected_span_text(metadata, extraction, ctx)
         validate_expected_font_id(metadata, extraction, ctx)
+        validate_expected_spans(metadata, ctx)
     if isinstance(layout, dict):
         validate_expected_count(
             layout.get("elements", []),