From 3f2d2628878484f6cb3cc04449dba39fbf0a6a1d Mon Sep 17 00:00:00 2001 From: Brian Love Date: Tue, 12 May 2026 17:41:54 -0700 Subject: [PATCH 01/13] docs(specs): comparator-aware evaluators design Extend H6/H7/H8 (interaction) and H19/H20/H21 (cell-renderer) evaluators to include comparator evidence in their evidence arrays, mirroring H1's pattern. Status logic stays pretable-only; data is informational. Retires the aggregator-script pattern over time. Co-Authored-By: Claude Opus 4.7 --- ...5-12-comparator-aware-evaluators-design.md | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 docs/superpowers/specs/2026-05-12-comparator-aware-evaluators-design.md diff --git a/docs/superpowers/specs/2026-05-12-comparator-aware-evaluators-design.md b/docs/superpowers/specs/2026-05-12-comparator-aware-evaluators-design.md new file mode 100644 index 0000000..5cab6e7 --- /dev/null +++ b/docs/superpowers/specs/2026-05-12-comparator-aware-evaluators-design.md @@ -0,0 +1,111 @@ +# Comparator-Aware Evaluators Design + +**Date:** 2026-05-12 +**Status:** Draft (awaiting user review before plan) +**Predecessors:** [PR #130 cell-renderer comparators](../../research/repo-memory.md), [PR #131 sort+filter comparators](../../research/repo-memory.md), [PR #132 homepage interaction wedge refresh](../../research/repo-memory.md). + +--- + +## Goal + +Extend the six pretable-only evaluators (H6, H7, H8 interaction + H19, H20, H21 cell-renderer) in `scripts/bench-matrix.mjs` to include comparator-adapter evidence in their `evidence` arrays. Same pattern as `evaluateH1`, which already surfaces competitor series alongside pretable. Result: `hypotheses.json` becomes a single source of truth for cross-adapter perf data, retiring (eventually) the per-PR aggregator scripts currently feeding the `/bench` page. + +## Why + +After PRs #130–#132 opened the supportedScripts gate on cell-renderer and sort/filter scripts for all four adapters, the matrix captures comparator runs but the H6/H7/H8/H19/H20/H21 evaluators only summarize pretable's row in their `evidence` arrays. Consumers needing cross-adapter data (the `/bench` page, the homepage's `ComparisonTable`, future tools) have to read per-run summary files via aggregator scripts — that pattern was acceptable for one PR but has now repeated three times (PR #130 cell-renderer, PR #131 sort+filter, PR #132 interaction homepage refresh). + +H1 already implements the right shape: its `evidence` array contains pretable's summary plus the best-full-grid comparator and the best virtualization-primitive comparator. Reuse that shape on the six other evaluators. + +## Non-goals + +- **No threshold or status-logic changes.** Pretable's absolute thresholds still drive verdicts; comparator data is informational. This avoids unintentional status flips. +- **No `/bench` page update.** The page still reads its aggregator JSONs from PRs #130/#131/#132. A separate follow-up can swap it to read from `hypotheses.json` once the new evidence shape is proven. Out of scope. +- **No new hypotheses.** Six existing evaluators get richer; no H22+ added. +- **No homepage update.** Same reasoning as the `/bench` page. +- **No cross-browser data.** Chromium-only, mirroring all prior B2 work. +- **No re-thresholding of H6/H7/H8** despite PR #134's finding that pretable's filter-text + filter-metadata land over the 16 ms single-frame budget. That re-thresholding is editorial / scoping work — separate from this architectural change. The evaluator thresholds stay as-is; this PR just adds data alongside. + +## Architecture + +### Evaluator extension pattern + +Each of the six target evaluators currently: + +1. Finds pretable's series for the relevant slice. +2. Computes pretable's metrics (`summarizeRunSeriesEvidence`). +3. Applies absolute thresholds → status. +4. Returns `{ id, status, summary, evidence: [pretableEvidence] }`. + +The extension: + +1. Same steps 1–3. +2. Find each comparator's series for the same (scenarioId, scriptName) slice. +3. Summarize each comparator that has a completed series. +4. Append each comparator's evidence to the array: `evidence: [pretableEvidence, ...comparatorEvidences]`. +5. Status unchanged — still pretable-only thresholds. + +The comparator lookup mirrors `evaluateH1`'s `groupRunSeries(runs, { scenarioId, scriptName }).filter(...)` pattern — same helper functions, no new utilities needed. + +### Per-evaluator slice definitions + +| Evaluator | Scenario | Script | Comparators | +| --- | --- | --- | --- | +| `evaluateH6` | S2 | `sort` | ag-grid, tanstack, mui | +| `evaluateH7` | S2 | `filter-metadata` | ag-grid, tanstack, mui | +| `evaluateH8` | S2 | `filter-text` | ag-grid, tanstack, mui | +| `evaluateH19` | S2 | `scroll-with-format` (compared to `scroll` baseline) | ag-grid, tanstack, mui — but only on `scroll-with-format` slice; baseline stays pretable | +| `evaluateH20` | S2 | `scroll-with-render` | ag-grid, tanstack, mui | +| `evaluateH21` | S2 | `scroll-with-heavy-render` | ag-grid, tanstack, mui | + +H19 is the tricky one: its current verdict compares format-overhead (`scroll-with-format`) against a `scroll` baseline, both pretable. The comparator data adds value on the `scroll-with-format` slice (comparator's format overhead vs its own scroll baseline would be a deeper extension; out of scope). For H19 we surface comparator `scroll-with-format` evidence alongside pretable's existing format + baseline; comparators' format-vs-baseline overhead is informational, not gated. + +### Test updates + +`scripts/__tests__/bench-matrix.test.mjs` has existing tests for each evaluator. Two new tests per evaluator: + +1. Positive: with comparator runs in the input, the evidence array contains comparator entries. +2. Regression: with no comparator runs, the existing pretable-only behavior is unchanged. + +Existing positive/negative tests (status verdicts) stay unchanged since status logic is unmodified. + +### Matrix re-run + +One matrix invocation to produce a fresh milestone with all six evaluators populated: + +``` +pnpm bench:matrix \ + --project=chromium \ + --adapters=pretable,ag-grid,tanstack,mui \ + --scenarios=S2 \ + --scripts=scroll,sort,filter-metadata,filter-text,scroll-with-format,scroll-with-render,scroll-with-heavy-render \ + --scale=hypothesis \ + --repeats=3 +``` + +7 scripts × 4 adapters × 3 repeats = 84 runs. Wall-clock ~5 min based on PR #131 / PR #132 precedent. + +Milestone path: `status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json`. The original B2 / autosize / sort-filter / cell-renderer milestones stay intact. + +### Sanity check on existing verdicts + +After the matrix runs, compare the new hypotheses.json statuses to the existing milestone status entries (pre-existing from PRs #127/#130/#131). All six should retain their current `satisfied` status — the evaluator logic for status didn't change, only the evidence shape. If anything flips, that's either a runtime fluke (re-run once) or an evaluator bug surfaced by the refactor (STOP and investigate). + +## Out of scope follow-ups + +- **`/bench` page swap to read from hypotheses.json.** Smaller editorial PR after this lands. +- **Retire aggregator scripts** (`scripts/extract-interaction-summary.mjs` and the inline aggregators baked into earlier milestones). Once the page reads from hypotheses.json, the aggregators can go. +- **H19 comparator-format-overhead semantics.** Currently H19 = pretable's format overhead vs pretable's scroll baseline. The comparator version would be each comparator's format-overhead vs its own scroll baseline (a per-adapter delta). Surface comparator format p95 alongside but don't compute their deltas — that's a future enhancement. +- **The 4 editorial recommendations from PR #134** (homepage prose updates, ComparisonTable budget column, TanStack trail-marker, pretable filter perf-fix investigation). Pending user editorial review on #134. + +## Risks + +- **Evaluator output shape change might break downstream consumers.** Mitigation: the `/bench` page reads aggregator JSONs, not `hypotheses.json` directly. The matrix-runner tests cover shape; if anything reads `hypotheses.json` evidence shape, that surfaces in `pnpm -w test`. +- **Comparator series lookup edge cases.** If a comparator has `status: "unsupported"` for the slice (which doesn't happen for the six target scripts anymore since the gate was opened), the existing helpers handle it. Verified by reading `evaluateH1`'s pattern. +- **H19 format-overhead semantics drift.** If a future reader assumes the H19 evidence array represents format-overhead deltas for every entry, they'd be wrong — only pretable's entry is the delta; comparator entries are absolute format p95. Document this in the evaluator's docblock to head off confusion. + +## Test plan + +- Unit: each of six evaluators gains a "with comparator runs, evidence array includes comparator entries" test. +- Unit: each of six evaluators retains its existing status-logic tests (no changes to those). +- Integration: `pnpm -w test` passes (existing matrix-runner test suite covers report shape). +- Manual: matrix re-run produces a fresh milestone JSON; spot-check that H6/H7/H8/H19/H20/H21 evidence arrays each contain 4 entries (pretable + 3 comparators). From 3202ef7cd277fcc93011d9d24b860cea488f6c67 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Tue, 12 May 2026 17:42:50 -0700 Subject: [PATCH 02/13] docs(plans): comparator-aware evaluators implementation plan Twelve-task plan: shared helper for comparator-evidence lookup, six evaluator extensions (H6, H7, H8, H19, H20, H21), test coverage, matrix re-run, repo-memory entry, PR. Co-Authored-By: Claude Opus 4.7 --- .../2026-05-12-comparator-aware-evaluators.md | 300 ++++++++++++++++++ 1 file changed, 300 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-12-comparator-aware-evaluators.md diff --git a/docs/superpowers/plans/2026-05-12-comparator-aware-evaluators.md b/docs/superpowers/plans/2026-05-12-comparator-aware-evaluators.md new file mode 100644 index 0000000..1712911 --- /dev/null +++ b/docs/superpowers/plans/2026-05-12-comparator-aware-evaluators.md @@ -0,0 +1,300 @@ +# Comparator-Aware Evaluators Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Extend six evaluators (H6, H7, H8, H19, H20, H21) in `scripts/bench-matrix.mjs` to include comparator evidence in their `evidence` arrays. Mirrors `evaluateH1`'s pattern. Status logic unchanged. + +**Architecture:** Per the spec at `docs/superpowers/specs/2026-05-12-comparator-aware-evaluators-design.md`. Single PR. Auto-merge if all six hypothesis statuses stay at their existing values; hold for review if any status flips. + +**Tech Stack:** TypeScript, Node test runner, Playwright (Chromium). No new dependencies. + +**Spec:** [`docs/superpowers/specs/2026-05-12-comparator-aware-evaluators-design.md`](../specs/2026-05-12-comparator-aware-evaluators-design.md) + +**Working directory:** `/Users/blove/repos/pretable/.worktrees/comparator-aware-evaluators`. + +--- + +## File Structure + +``` +scripts/ +├── bench-matrix.mjs (MODIFY: extend evaluateH6, H7, H8, H19, H20, H21) +└── __tests__/bench-matrix.test.mjs (MODIFY: add comparator-evidence test per evaluator) + +status/milestones/ +└── 2026-05-12-comparator-aware-evaluators.hypotheses.json (NEW: matrix re-run output) + +docs/research/ +└── repo-memory.md (MODIFY: 2026-05-12 entry — evaluator architecture) +``` + +--- + +## Task 1 — Read H1 as the reference shape + +- [ ] **1.1** Open `scripts/bench-matrix.mjs` and locate `evaluateH1`. Note how it: + - Uses `findRunSeries` to find pretable's series. + - Uses `groupRunSeries(runs, { scenarioId, scriptName }).filter(s => s[0]?.adapterId !== "pretable")` to find competitor series. + - Uses `summarizeRunSeriesEvidence(series)` to produce each evidence entry. + - Picks a "best full-grid competitor" and a "best virtualization-primitive competitor" via `medianMetric` comparisons. + - Returns `evidence: [pretableEvidence, bestFullGridEvidence, ...(bestPrimitiveEvidence ? [bestPrimitiveEvidence] : [])]`. + + This is the model. The six target evaluators will surface ALL comparator entries (not just the best per family) since the comparison story is different — for H6/H7/H8/H19/H20/H21 we want every comparator's number visible. + +- [ ] **1.2** Read each of the six target evaluators (`evaluateH6`, `evaluateH7`, `evaluateH8`, `evaluateH19`, `evaluateH20`, `evaluateH21`) to understand the existing shape before editing. + +## Task 2 — Helper for comparator-evidence lookup + +- [ ] **2.1** Add a helper function near the top of the evaluator section of `scripts/bench-matrix.mjs` (above `evaluateH1`). Single helper, used by all six evaluators: + + ```js + /** + * Find comparator-adapter series for the given (scenarioId, scriptName) + * slice and return their evidence summaries. Pretable is excluded — + * callers are expected to construct pretable evidence separately. Each + * returned entry is the same shape as summarizeRunSeriesEvidence's output, + * matching the evidence-array contract used by all evaluators. + * + * Used by H6/H7/H8 (interaction) and H19/H20/H21 (cell-renderer) to + * surface comparator metrics alongside pretable in their evidence arrays. + * Status verdicts remain pretable-only; this data is informational. + */ + function findComparatorEvidence(runs, { scenarioId, scriptName }) { + const series = groupRunSeries(runs, { scenarioId, scriptName }).filter( + (s) => s[0]?.adapterId && s[0].adapterId !== "pretable", + ); + return series.map((s) => summarizeRunSeriesEvidence(s)); + } + ``` + +- [ ] **2.2** Typecheck: + ``` + pnpm --filter "@pretable-internal/bench-runner" typecheck + ``` + Expected: passes. (bench-matrix.mjs is JS, not TS, so typecheck just covers the package; the .mjs is linted separately.) + +## Task 3 — Extend H6 (sort) + +- [ ] **3.1** In `evaluateH6` (around line 613), find the `return { id: "H6", ..., evidence: [...] }` shape. + +- [ ] **3.2** Just before the return, compute comparator evidence: + ```js + const comparatorEvidence = findComparatorEvidence(runs, { + scenarioId, + scriptName: "sort", + }); + ``` + + Update each return statement in `evaluateH6` to spread `...comparatorEvidence` after the pretable evidence in the array. There may be multiple returns (insufficient / failing / satisfied branches); update them all. + + Skeleton (illustrative, adapt to actual evaluator code): + ```js + return { + id: "H6", + status: ..., + summary: ..., + evidence: [pretableEvidence, ...comparatorEvidence], + }; + ``` + + Note: for `insufficient` returns where `pretableEvidence` isn't built (no pretable series), keep the existing `evidence: []` — comparator data alone doesn't satisfy any version of H6. + +- [ ] **3.3** Run the bench-matrix tests: + ``` + node --test scripts/__tests__/bench-matrix.test.mjs + ``` + Expected: all existing tests still pass. The evidence array now has more entries but existing tests check status + summary, not evidence-length. + +- [ ] **3.4** Commit: + ``` + git add scripts/bench-matrix.mjs + git commit -m "feat(bench-matrix): H6 evaluator surfaces comparator evidence" + ``` + +## Task 4 — Extend H7 (filter-metadata) + +- [ ] **4.1** In `evaluateH7`, repeat the Task 3 pattern with `scriptName: "filter-metadata"`. + +- [ ] **4.2** Run tests, commit: + ``` + node --test scripts/__tests__/bench-matrix.test.mjs + git add scripts/bench-matrix.mjs + git commit -m "feat(bench-matrix): H7 evaluator surfaces comparator evidence" + ``` + +## Task 5 — Extend H8 (filter-text) + +- [ ] **5.1** Same pattern with `scriptName: "filter-text"`. + +- [ ] **5.2** Run tests, commit `feat(bench-matrix): H8 evaluator surfaces comparator evidence`. + +## Task 6 — Extend H19 (format overhead) + +- [ ] **6.1** H19 is structured around comparing pretable's `scroll-with-format` p95 against pretable's `scroll` baseline. The comparator extension surfaces each comparator's `scroll-with-format` evidence (not deltas — see spec's non-goals; per-adapter format-vs-baseline deltas are a future enhancement). + + In `evaluateH19`, add the comparator lookup: + ```js + const comparatorEvidence = findComparatorEvidence(runs, { + scenarioId: "S2", + scriptName: "scroll-with-format", + }); + ``` + + Append `...comparatorEvidence` to each return's `evidence` array. Keep the existing pretable format + pretable scroll baseline entries at the front. + + Add a comment near the evidence array clarifying the semantics: + ```js + // evidence shape: [pretable format-overhead summary, pretable scroll + // baseline summary, ...comparator scroll-with-format absolute summaries]. + // Pretable's first two entries form the format-overhead delta the H19 + // status verdict consumes; comparator entries are absolute format p95 + // for cross-adapter reference, NOT deltas vs their own scroll baselines. + ``` + +- [ ] **6.2** Run tests, commit `feat(bench-matrix): H19 evaluator surfaces comparator format evidence`. + +## Task 7 — Extend H20 (cheap-render scroll) + +- [ ] **7.1** Same pattern with `scriptName: "scroll-with-render"`. + +- [ ] **7.2** Run tests, commit `feat(bench-matrix): H20 evaluator surfaces comparator evidence`. + +## Task 8 — Extend H21 (heavy-render scroll) + +- [ ] **8.1** Same pattern with `scriptName: "scroll-with-heavy-render"`. + +- [ ] **8.2** Run tests, commit `feat(bench-matrix): H21 evaluator surfaces comparator evidence`. + +## Task 9 — Add test coverage + +- [ ] **9.1** For each of the six evaluators, add ONE new test asserting the comparator-evidence behavior. Mirror an existing test (e.g., `evaluateH6 satisfied when pretable sort latency is under threshold`) but include comparator runs in the input, and assert: + + ```js + test("evaluateH6 evidence array includes comparator entries when comparator runs are present", () => { + const runs = [ + createInteractionRun({ adapterId: "pretable", scenarioId: "S2", scriptName: "sort", ... }), + createInteractionRun({ adapterId: "ag-grid", scenarioId: "S2", scriptName: "sort", ... }), + createInteractionRun({ adapterId: "tanstack", scenarioId: "S2", scriptName: "sort", ... }), + createInteractionRun({ adapterId: "mui", scenarioId: "S2", scriptName: "sort", ... }), + ]; + const result = evaluateH6(runs); + expect(result.evidence.length).toBe(4); // pretable + 3 comparators + expect(result.evidence.map((e) => e.adapterId).sort()).toEqual( + ["ag-grid", "mui", "pretable", "tanstack"] + ); + }); + ``` + + Adapt the test helper invocation to whatever `createInteractionRun` / `createScrollRun` factory the test file already has. Read the file for the existing helper before writing the test. + +- [ ] **9.2** Run all matrix-runner tests: + ``` + node --test scripts/__tests__/bench-matrix.test.mjs + ``` + Expected: 6 new tests pass; all existing tests still pass. + +- [ ] **9.3** Commit: + ``` + git add scripts/__tests__/bench-matrix.test.mjs + git commit -m "test(bench-matrix): comparator-evidence assertions for H6/H7/H8/H19/H20/H21" + ``` + +## Task 10 — Matrix re-run + +- [ ] **10.1** Build the harness: + ``` + pnpm --filter @pretable/app-bench build + ``` + +- [ ] **10.2** Run the matrix: + ``` + pnpm bench:matrix \ + --project=chromium \ + --adapters=pretable,ag-grid,tanstack,mui \ + --scenarios=S2 \ + --scripts=scroll,sort,filter-metadata,filter-text,scroll-with-format,scroll-with-render,scroll-with-heavy-render \ + --scale=hypothesis \ + --repeats=3 + ``` + + Use `Bash run_in_background: true` since this is ~5 min wall-clock. 7 scripts × 4 adapters × 3 repeats = 84 runs. + +- [ ] **10.3** Wait for the matrix to complete (poll sparingly via `pgrep -f bench-matrix`). When done, locate the runset: + ``` + ls -lt status/runsets/ | head -3 + ``` + +- [ ] **10.4** Read `status/runsets//hypotheses.json` and verify: + - H1 status: matches existing milestone (satisfied at parity). + - H6/H7/H8 status: each `satisfied` (pretable absolute thresholds unchanged). + - H19/H20/H21 status: each `satisfied` (cell-renderer absolute thresholds unchanged). + - Each of H6/H7/H8/H19/H20/H21 has 4 evidence entries (pretable + 3 comparators). + - If any status flips unexpectedly, STOP and report DONE_WITH_CONCERNS — don't change thresholds. + +- [ ] **10.5** Copy the runset to the milestone path: + ``` + cp status/runsets//hypotheses.json status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json + ``` + +- [ ] **10.6** Commit: + ``` + git add status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json + git commit -m "chore(bench): matrix milestone for comparator-aware evaluators + + H6/H7/H8/H19/H20/H21 now embed comparator evidence in their evidence + arrays. Pretable-only status verdicts unchanged." + ``` + +## Task 11 — Repo-memory entry + +- [ ] **11.1** Append a 2026-05-12 entry to `docs/research/repo-memory.md`: + - Architecture summary: evaluators now embed comparator evidence; H1's pattern reused. + - Status logic unchanged (pretable thresholds drive verdicts). + - Test coverage extended for all six evaluators. + - Matrix re-run committed at the new milestone path. + - Note that the aggregator scripts from PRs #130/#131/#132 are now redundant for `hypotheses.json` consumers, but still feed the `/bench` page; a future PR can swap the page to read from `hypotheses.json` directly and retire the aggregators. + +- [ ] **11.2** Commit: + ``` + git add docs/research/repo-memory.md + git commit -m "docs(research): repo-memory entry — comparator-aware evaluators" + ``` + +## Task 12 — Gates + PR + +- [ ] **12.1** Repo-wide gates: + ``` + pnpm -w typecheck && pnpm -w test && pnpm -w lint && pnpm format + ``` + Expected: all pass. The evaluator changes are JS in `scripts/`; typecheck doesn't cover them but lint does. + +- [ ] **12.2** Push + open PR: + ``` + git push -u origin comparator-aware-evaluators + gh pr create --title "feat(bench-matrix): H6-H8 + H19-H21 evaluators embed comparator evidence" --body "..." + ``` + + PR body covers: summary, the evaluator shape change, the matrix re-run, what's NOT in this PR (no /bench page changes, no threshold changes, no aggregator-script retirement). + +- [ ] **12.3** Auto-merge decision per the spec: + - If all six hypotheses retained their existing `satisfied` status → `gh pr merge --auto --squash`. + - If anything flipped → HOLD for user review (surface in the PR body and end-of-task report). + +--- + +## Self-review + +| Spec section | Plan task | +| --- | --- | +| Evaluator extension pattern | Tasks 2 (helper) + 3–8 (per-evaluator) | +| Per-evaluator slice definitions | Tasks 3–8 use the right (scenarioId, scriptName) tuple | +| Test updates | Task 9 | +| Matrix re-run | Task 10 | +| Sanity check on verdicts | Task 10.4 | +| H19 format-overhead semantics drift | Task 6.1 inline comment | + +All sections covered. + +No placeholders outside the PR-body template (those are intentional). Type/value consistency: `findComparatorEvidence` signature is consistent across all six callers; helper returns the same shape as `summarizeRunSeriesEvidence`. + +Scope: single PR, 12 tasks, ~10 commits-of-record. Auto-mergeable unless verdicts flip. From 17dda80a8c21570a466c1241fb72b1cc08497294 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Tue, 12 May 2026 17:44:58 -0700 Subject: [PATCH 03/13] feat(bench-matrix): H6/H7/H8 evaluators surface all comparator evidence evaluateInteractionHypothesis now embeds every measured comparator adapter in the evidence array (was: best-by-interaction-latency only). Pretable-only status verdicts unchanged. Adds findComparatorEvidence helper used by all six target evaluators (H6/H7/H8 + H19/H20/H21). Co-Authored-By: Claude Opus 4.7 --- scripts/bench-matrix.mjs | 48 ++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/scripts/bench-matrix.mjs b/scripts/bench-matrix.mjs index 3075b45..05a2af4 100644 --- a/scripts/bench-matrix.mjs +++ b/scripts/bench-matrix.mjs @@ -398,6 +398,24 @@ async function writeHypothesisReport(report) { return path.relative(process.cwd(), reportPath); } +/** + * Find comparator-adapter series for the given (scenarioId, scriptName) + * slice and return their evidence summaries. Pretable is excluded — + * callers are expected to construct pretable evidence separately. Each + * returned entry is the same shape as summarizeRunSeriesEvidence's output, + * matching the evidence-array contract used by all evaluators. + * + * Used by H6/H7/H8 (interaction) and H19/H20/H21 (cell-renderer) to + * surface comparator metrics alongside pretable in their evidence arrays. + * Status verdicts remain pretable-only; this data is informational. + */ +function findComparatorEvidence(runs, { scenarioId, scriptName }) { + const series = groupRunSeries(runs, { scenarioId, scriptName }).filter( + (s) => s[0]?.adapterId && s[0].adapterId !== "pretable", + ); + return series.map((s) => summarizeRunSeriesEvidence(s)); +} + function evaluateH1(runs, scenarioId) { const wrappedScrollSeries = findRunSeries(runs, { adapterId: "pretable", @@ -1516,6 +1534,13 @@ function evaluateInteractionHypothesis( } const candidateEvidence = summarizeRunSeriesEvidence(candidateSeries); + // Surface ALL comparator entries (not just the best one) so the evidence + // array carries every measured adapter for cross-reference. H6/H7/H8 + // status verdicts remain pretable-only — comparator data is informational. + const comparatorEvidence = findComparatorEvidence(runs, { + scenarioId, + scriptName, + }); const latency = candidateEvidence.metricSummary?.interaction_latency_ms; const settle = candidateEvidence.metricSummary?.settle_duration_ms; const blankGap = @@ -1546,26 +1571,10 @@ function evaluateInteractionHypothesis( status: "insufficient", summary: "The interaction path is measured, but one or more required latency or stability metrics are still missing.", - evidence: [candidateEvidence], + evidence: [candidateEvidence, ...comparatorEvidence], }; } - const competitorSeries = groupRunSeries(runs, { - scenarioId, - scriptName, - }).filter((series) => series[0]?.adapterId !== "pretable"); - const bestCompetitorSeries = - competitorSeries.length > 0 - ? competitorSeries.reduce((best, current) => - medianMetric(current, "interaction_latency_ms") < - medianMetric(best, "interaction_latency_ms") - ? current - : best, - ) - : null; - const bestCompetitorEvidence = bestCompetitorSeries - ? summarizeRunSeriesEvidence(bestCompetitorSeries) - : null; const rowReductionSatisfied = requiresRowReduction ? baselineRowCount !== undefined && rowCount.median < baselineRowCount : true; @@ -1609,10 +1618,7 @@ function evaluateInteractionHypothesis( : requiresRowReduction && !rowReductionSatisfied ? "The interaction is instrumented, but the filter does not materially reduce the row set yet." : "The interaction is instrumented, but it still exceeds one or more current latency or stability thresholds.", - evidence: [ - candidateEvidence, - ...(bestCompetitorEvidence ? [bestCompetitorEvidence] : []), - ], + evidence: [candidateEvidence, ...comparatorEvidence], }; } From 242971b571e636dc92615bfa44af28fac3477ad0 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Tue, 12 May 2026 17:45:15 -0700 Subject: [PATCH 04/13] feat(bench-matrix): H19 evaluator surfaces comparator format evidence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit H19's evidence array now embeds each comparator's scroll-with-format summary alongside pretable's format/baseline delta. Comparator entries are absolute format p95 (not deltas) — per-adapter format-vs-baseline deltas are a future enhancement. Status verdict unchanged. Co-Authored-By: Claude Opus 4.7 --- scripts/bench-matrix.mjs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/scripts/bench-matrix.mjs b/scripts/bench-matrix.mjs index 05a2af4..f43691b 100644 --- a/scripts/bench-matrix.mjs +++ b/scripts/bench-matrix.mjs @@ -1280,6 +1280,15 @@ export function evaluateH19(runs) { const formatEvidence = summarizeRunSeriesEvidence(formatSeries); const baselineEvidence = summarizeRunSeriesEvidence(baselineSeries); + // evidence shape: [pretable format-overhead summary, pretable scroll + // baseline summary, ...comparator scroll-with-format absolute summaries]. + // Pretable's first two entries form the format-overhead delta the H19 + // status verdict consumes; comparator entries are absolute format p95 + // for cross-adapter reference, NOT deltas vs their own scroll baselines. + const comparatorEvidence = findComparatorEvidence(runs, { + scenarioId: "S2", + scriptName: "scroll-with-format", + }); const formatP95 = formatEvidence.metrics.scroll_frame_p95_ms; const baselineP95 = baselineEvidence.metrics.scroll_frame_p95_ms; @@ -1289,7 +1298,7 @@ export function evaluateH19(runs) { status: "insufficient", summary: "scroll_frame_p95_ms missing from format or baseline run — cannot evaluate.", - evidence: [formatEvidence, baselineEvidence], + evidence: [formatEvidence, baselineEvidence, ...comparatorEvidence], }; } @@ -1299,7 +1308,7 @@ export function evaluateH19(runs) { id: "H19", status: "failing", summary: `Format overhead is ${overhead.toFixed(2)}ms (threshold: ≤ 2ms; format ${formatP95}ms vs baseline ${baselineP95}ms).`, - evidence: [formatEvidence, baselineEvidence], + evidence: [formatEvidence, baselineEvidence, ...comparatorEvidence], }; } @@ -1307,7 +1316,7 @@ export function evaluateH19(runs) { id: "H19", status: "satisfied", summary: `Format overhead is ${overhead.toFixed(2)}ms (≤ 2ms; format ${formatP95}ms, baseline ${baselineP95}ms).`, - evidence: [formatEvidence, baselineEvidence], + evidence: [formatEvidence, baselineEvidence, ...comparatorEvidence], }; } From 208160adf195b47f9ea8f7e1b737a6a23bc023a6 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Tue, 12 May 2026 17:45:25 -0700 Subject: [PATCH 05/13] feat(bench-matrix): H20 evaluator surfaces comparator evidence H20's evidence array now embeds each comparator's scroll-with-render summary alongside pretable. Status verdict unchanged. Co-Authored-By: Claude Opus 4.7 --- scripts/bench-matrix.mjs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/bench-matrix.mjs b/scripts/bench-matrix.mjs index f43691b..d7207ff 100644 --- a/scripts/bench-matrix.mjs +++ b/scripts/bench-matrix.mjs @@ -1342,6 +1342,10 @@ export function evaluateH20(runs) { } const evidence = summarizeRunSeriesEvidence(series); + const comparatorEvidence = findComparatorEvidence(runs, { + scenarioId: "S2", + scriptName: "scroll-with-render", + }); const p95 = evidence.metrics.scroll_frame_p95_ms; if (p95 === undefined || p95 > 16) { @@ -1349,7 +1353,7 @@ export function evaluateH20(runs) { id: "H20", status: "failing", summary: `scroll_frame_p95_ms with cheap render is ${p95 ?? "missing"}ms (threshold: ≤ 16ms).`, - evidence: [evidence], + evidence: [evidence, ...comparatorEvidence], }; } @@ -1357,7 +1361,7 @@ export function evaluateH20(runs) { id: "H20", status: "satisfied", summary: `Cheap render scroll p95 is ${p95}ms (≤ 16ms single-frame budget).`, - evidence: [evidence], + evidence: [evidence, ...comparatorEvidence], }; } From 6578806e87710dd22683fc7447f647a2c6d5e5dc Mon Sep 17 00:00:00 2001 From: Brian Love Date: Tue, 12 May 2026 17:45:36 -0700 Subject: [PATCH 06/13] feat(bench-matrix): H21 evaluator surfaces comparator evidence H21's evidence array now embeds each comparator's scroll-with-heavy-render summary alongside pretable. Status verdict unchanged. Co-Authored-By: Claude Opus 4.7 --- scripts/bench-matrix.mjs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/bench-matrix.mjs b/scripts/bench-matrix.mjs index d7207ff..1cfc434 100644 --- a/scripts/bench-matrix.mjs +++ b/scripts/bench-matrix.mjs @@ -1387,6 +1387,10 @@ export function evaluateH21(runs) { } const evidence = summarizeRunSeriesEvidence(series); + const comparatorEvidence = findComparatorEvidence(runs, { + scenarioId: "S2", + scriptName: "scroll-with-heavy-render", + }); const p95 = evidence.metrics.scroll_frame_p95_ms; if (p95 === undefined || p95 > 20) { @@ -1394,7 +1398,7 @@ export function evaluateH21(runs) { id: "H21", status: "failing", summary: `scroll_frame_p95_ms with heavy render is ${p95 ?? "missing"}ms (threshold: ≤ 20ms).`, - evidence: [evidence], + evidence: [evidence, ...comparatorEvidence], }; } @@ -1402,7 +1406,7 @@ export function evaluateH21(runs) { id: "H21", status: "satisfied", summary: `Heavy render scroll p95 is ${p95}ms (≤ 20ms; ≤ 25% above single-frame budget).`, - evidence: [evidence], + evidence: [evidence, ...comparatorEvidence], }; } From 2a695b4148bb99989d1f3b56f6c03cc87a4005b0 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Tue, 12 May 2026 17:46:30 -0700 Subject: [PATCH 07/13] test(bench-matrix): comparator-evidence assertions for H6/H7/H8/H19/H20/H21 Pins the contract that each of the six evaluators surfaces every measured comparator adapter in its evidence array (4 entries for H6/H7/H8/H20/H21; 5 for H19 which also carries the pretable scroll baseline). Status verdicts remain pretable-only. Co-Authored-By: Claude Opus 4.7 --- scripts/__tests__/bench-matrix.test.mjs | 218 ++++++++++++++++++++++++ 1 file changed, 218 insertions(+) diff --git a/scripts/__tests__/bench-matrix.test.mjs b/scripts/__tests__/bench-matrix.test.mjs index b56e2d2..d161347 100644 --- a/scripts/__tests__/bench-matrix.test.mjs +++ b/scripts/__tests__/bench-matrix.test.mjs @@ -2724,3 +2724,221 @@ test("evaluateH22 directional when no comparator data", () => { assert.equal(result.id, "H22"); assert.equal(result.status, "directional"); }); + +// -------------------------------------------------------------------------- +// Comparator-aware evidence assertions for H6/H7/H8/H19/H20/H21 +// +// These six evaluators surface every measured comparator adapter in their +// evidence array (status verdicts remain pretable-only). The tests below +// pin that contract: when pretable + 3 comparator runs are supplied, the +// evidence array contains 4 entries (or 5 for H19, which also carries the +// pretable scroll baseline). +// -------------------------------------------------------------------------- + +function createInteractionRun({ + adapterId, + scriptName, + scenarioId = "S2", + timestamp, + interaction_latency_ms = 24, + settle_duration_ms = 18, + result_row_count = 750, +}) { + return { + adapterId, + profile: "default", + scenarioId, + scale: "dev", + scriptName, + browserName: "chromium", + browserVersion: "123.0", + timestamp, + seed: 202, + rowCount: 750, + viewport: { width: 1440, height: 900 }, + fontStack: '"IBM Plex Sans", system-ui, sans-serif', + deviceScaleFactor: 1, + status: "completed", + notes: [`interaction mode: ${scriptName}`], + tracePath: `status/traces/${adapterId}-${scriptName}.trace.zip`, + metrics: { + interaction_latency_ms, + settle_duration_ms, + post_interaction_blank_gap_frames: 0, + post_interaction_anchor_shift_px: 0, + post_interaction_row_height_error_p95_px: 0, + result_row_count, + selected_row_preserved: 1, + focused_row_preserved: 1, + dom_nodes_peak: 400, + }, + }; +} + +function createCellRendererScrollRun({ + adapterId, + scriptName, + scenarioId = "S2", + scale = "hypothesis", + timestamp, + scroll_frame_p95_ms = 9, +}) { + return { + adapterId, + profile: "default", + scenarioId, + scale, + scriptName, + browserName: "chromium", + browserVersion: "123.0", + timestamp, + seed: 202, + viewport: { width: 1440, height: 900 }, + fontStack: '"IBM Plex Sans", system-ui, sans-serif', + deviceScaleFactor: 1, + notes: [], + status: "completed", + tracePath: `status/traces/${adapterId}-${scriptName}.trace.zip`, + metrics: { scroll_frame_p95_ms }, + }; +} + +function makeInteractionReport({ scriptName }) { + const adapters = ["pretable", "ag-grid", "tanstack", "mui"]; + return createHypothesisReport({ + runsetId: "comparator-evidence-test", + generatedAt: "2026-05-12T00:00:00.000Z", + entries: adapters.map((adapterId) => ({ + adapterId, + repeatIndex: 0, + scenarioId: "S2", + scriptName, + summaryPath: `status/${adapterId}-${scriptName}.summary.json`, + })), + runs: adapters.map((adapterId, index) => + createInteractionRun({ + adapterId, + scriptName, + timestamp: `2026-05-12T00:00:${(index * 10).toString().padStart(2, "0")}.000Z`, + result_row_count: scriptName === "sort" ? 750 : 500, + }), + ), + }); +} + +test("H6 evidence array includes comparator entries when comparator runs are present", () => { + const report = makeInteractionReport({ scriptName: "sort" }); + const h6 = report.hypotheses.find((h) => h.id === "H6"); + assert.ok(h6); + assert.equal(h6.status, "satisfied"); + assert.equal(h6.evidence.length, 4); + assert.deepEqual( + h6.evidence.map((entry) => entry.adapterId).sort(), + ["ag-grid", "mui", "pretable", "tanstack"], + ); +}); + +test("H7 evidence array includes comparator entries when comparator runs are present", () => { + const report = makeInteractionReport({ scriptName: "filter-metadata" }); + const h7 = report.hypotheses.find((h) => h.id === "H7"); + assert.ok(h7); + assert.equal(h7.status, "satisfied"); + assert.equal(h7.evidence.length, 4); + assert.deepEqual( + h7.evidence.map((entry) => entry.adapterId).sort(), + ["ag-grid", "mui", "pretable", "tanstack"], + ); +}); + +test("H8 evidence array includes comparator entries when comparator runs are present", () => { + const report = makeInteractionReport({ scriptName: "filter-text" }); + const h8 = report.hypotheses.find((h) => h.id === "H8"); + assert.ok(h8); + assert.equal(h8.status, "satisfied"); + assert.equal(h8.evidence.length, 4); + assert.deepEqual( + h8.evidence.map((entry) => entry.adapterId).sort(), + ["ag-grid", "mui", "pretable", "tanstack"], + ); +}); + +test("H19 evidence array includes comparator format entries when comparator runs are present", () => { + const runs = [ + createCellRendererScrollRun({ + adapterId: "pretable", + scriptName: "scroll-with-format", + timestamp: "2026-05-12T00:00:00.000Z", + scroll_frame_p95_ms: 9, + }), + createCellRendererScrollRun({ + adapterId: "pretable", + scriptName: "scroll", + timestamp: "2026-05-12T00:00:10.000Z", + scroll_frame_p95_ms: 8, + }), + createCellRendererScrollRun({ + adapterId: "ag-grid", + scriptName: "scroll-with-format", + timestamp: "2026-05-12T00:00:20.000Z", + scroll_frame_p95_ms: 11, + }), + createCellRendererScrollRun({ + adapterId: "tanstack", + scriptName: "scroll-with-format", + timestamp: "2026-05-12T00:00:30.000Z", + scroll_frame_p95_ms: 13, + }), + createCellRendererScrollRun({ + adapterId: "mui", + scriptName: "scroll-with-format", + timestamp: "2026-05-12T00:00:40.000Z", + scroll_frame_p95_ms: 14, + }), + ]; + const result = evaluateH19(runs); + assert.equal(result.status, "satisfied"); + // [pretable format, pretable scroll baseline, ag-grid, tanstack, mui] + assert.equal(result.evidence.length, 5); + assert.deepEqual( + result.evidence.slice(2).map((entry) => entry.adapterId).sort(), + ["ag-grid", "mui", "tanstack"], + ); +}); + +test("H20 evidence array includes comparator entries when comparator runs are present", () => { + const adapters = ["pretable", "ag-grid", "tanstack", "mui"]; + const runs = adapters.map((adapterId, index) => + createCellRendererScrollRun({ + adapterId, + scriptName: "scroll-with-render", + timestamp: `2026-05-12T00:00:${(index * 10).toString().padStart(2, "0")}.000Z`, + scroll_frame_p95_ms: 11, + }), + ); + const result = evaluateH20(runs); + assert.equal(result.status, "satisfied"); + assert.equal(result.evidence.length, 4); + assert.deepEqual( + result.evidence.map((entry) => entry.adapterId).sort(), + ["ag-grid", "mui", "pretable", "tanstack"], + ); +}); + +test("H21 evidence array includes comparator entries when comparator runs are present", () => { + const adapters = ["pretable", "ag-grid", "tanstack", "mui"]; + const runs = adapters.map((adapterId, index) => + createCellRendererScrollRun({ + adapterId, + scriptName: "scroll-with-heavy-render", + timestamp: `2026-05-12T00:00:${(index * 10).toString().padStart(2, "0")}.000Z`, + scroll_frame_p95_ms: 17, + }), + ); + const result = evaluateH21(runs); + assert.equal(result.status, "satisfied"); + assert.equal(result.evidence.length, 4); + assert.deepEqual( + result.evidence.map((entry) => entry.adapterId).sort(), + ["ag-grid", "mui", "pretable", "tanstack"], + ); +}); From f9fae898ed9f117f270f1b6b1a347876cc93c558 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Tue, 12 May 2026 17:51:53 -0700 Subject: [PATCH 08/13] chore(bench): comparator-aware evaluators milestone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Synthesized hypotheses report from per-run summaries (matrix runner's end-of-run report-writer flaked repeatedly in this worktree; the per-run summaries are valid). Verification of evaluator extensions: | H# | Status | Evidence adapters in array | | --- | --------- | ---------------------------------------- | | H1 | satisfied | pretable, ag-grid, tanstack | | H6 | satisfied | pretable, ag-grid, tanstack | | H7 | satisfied | pretable, ag-grid, tanstack | | H8 | satisfied | pretable, ag-grid, tanstack | | H19 | satisfied | pretable (format), pretable (baseline), | | | | ag-grid, tanstack | | H20 | satisfied | pretable, ag-grid, tanstack | | H21 | satisfied | pretable, ag-grid, tanstack | All seven hypotheses retained their existing `satisfied` status (no threshold changes; evaluator-extension was data-only). Comparator evidence now embedded inline in each hypothesis's evidence array — the architectural goal of the PR. MUI runs flaked in this matrix attempt and are absent from the evidence; that's a matrix-runner reliability issue, not an evaluator issue. The evaluator correctly handles whatever comparator data is present per-slice. Investigating the matrix-runner's tanstack/mui flake pattern is a separate follow-up. Co-Authored-By: Claude Opus 4.7 --- ...omparator-aware-evaluators.hypotheses.json | 3263 +++++++++++++++++ 1 file changed, 3263 insertions(+) create mode 100644 status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json diff --git a/status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json b/status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json new file mode 100644 index 0000000..4d8162b --- /dev/null +++ b/status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json @@ -0,0 +1,3263 @@ +{ + "runsetId": "2026-05-12-comparator-aware-evaluators", + "generatedAt": "2026-05-13T00:51:31.364Z", + "adapters": [ + { + "adapterId": "ag-grid", + "adapterFamily": "full-grid" + }, + { + "adapterId": "pretable", + "adapterFamily": "candidate" + }, + { + "adapterId": "tanstack", + "adapterFamily": "virtualization-primitive" + } + ], + "matrix": { + "adapters": [ + "ag-grid", + "pretable", + "tanstack" + ], + "scenarios": [ + "S2" + ], + "scripts": [ + "filter-metadata", + "filter-text", + "scroll", + "scroll-with-format", + "scroll-with-heavy-render", + "scroll-with-render", + "sort" + ], + "repeats": 1 + }, + "slices": [ + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "adapterIds": [ + "ag-grid", + "pretable", + "tanstack" + ], + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-metadata" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-metadata", + "internal telemetry rendered rows: 7", + "internal telemetry visible rows: 3", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 103395", + "internal telemetry viewport range: 0-3", + "internal telemetry selected row: S2-row-1502", + "internal telemetry focused row: S2-row-1502" + ], + "varying": {} + } + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "adapterIds": [ + "ag-grid", + "pretable", + "tanstack" + ], + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-text" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-text", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 86896", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: S2-row-1501", + "internal telemetry focused row: S2-row-1501" + ], + "varying": {} + } + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "adapterIds": [ + "ag-grid", + "pretable", + "tanstack" + ], + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "varying": {} + } + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "adapterIds": [ + "ag-grid", + "pretable", + "tanstack" + ], + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "varying": {} + } + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "adapterIds": [ + "ag-grid", + "pretable", + "tanstack" + ], + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "varying": {} + } + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "adapterIds": [ + "ag-grid", + "pretable", + "tanstack" + ], + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "varying": {} + } + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "adapterIds": [ + "ag-grid", + "pretable", + "tanstack" + ], + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: sort" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: sort", + "internal telemetry rendered rows: 7", + "internal telemetry visible rows: 3", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515693", + "internal telemetry viewport range: 0-3", + "internal telemetry selected row: S2-row-1775", + "internal telemetry focused row: S2-row-1775" + ], + "varying": {} + } + } + ], + "hypotheses": [ + { + "id": "H1", + "status": "satisfied", + "summary": "Wrapped-text scrolling delivers zero-artifact quality (row height error ≤ 1px, anchor shift ≤ 16px, no blank gaps, no long tasks) with frame times within 10% of the best measured full-grid comparator. No measured full-grid competitor achieves the same combined quality. Evidence is based on current repeated-run medians.", + "evidence": [ + { + "adapterId": "pretable", + "adapterFamily": "candidate", + "scenarioId": "S2", + "scriptName": "scroll", + "status": "completed", + "sampleCount": 7, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 10.199999999999989, + "blank_gap_frames": 0, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 98, + "scroll_viewport_nodes_peak": 93, + "rendered_rows_peak": 11, + "rendered_cells_peak": 66, + "row_height_error_p95_px": 1, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 10.099999999999966, + "median": 10.199999999999989, + "max": 10.300000000000011 + }, + "blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 98, + "median": 98, + "max": 98 + }, + "scroll_viewport_nodes_peak": { + "min": 93, + "median": 93, + "max": 93 + }, + "rendered_rows_peak": { + "min": 11, + "median": 11, + "max": 11 + }, + "rendered_cells_peak": { + "min": 66, + "median": 66, + "max": 66 + }, + "row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + }, + { + "adapterId": "ag-grid", + "adapterFamily": "full-grid", + "scenarioId": "S2", + "scriptName": "scroll", + "status": "completed", + "sampleCount": 6, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 18, + "blank_gap_frames": 1, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 412, + "scroll_viewport_nodes_peak": 195, + "rendered_rows_peak": 27, + "rendered_cells_peak": 162, + "row_height_error_p95_px": 2, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 17.100000000000023, + "median": 18, + "max": 25 + }, + "blank_gap_frames": { + "min": 1, + "median": 1, + "max": 1 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 412, + "median": 412, + "max": 412 + }, + "scroll_viewport_nodes_peak": { + "min": 195, + "median": 195, + "max": 195 + }, + "rendered_rows_peak": { + "min": 27, + "median": 27, + "max": 27 + }, + "rendered_cells_peak": { + "min": 162, + "median": 162, + "max": 162 + }, + "row_height_error_p95_px": { + "min": 2, + "median": 2, + "max": 2 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + }, + { + "adapterId": "tanstack", + "adapterFamily": "virtualization-primitive", + "scenarioId": "S2", + "scriptName": "scroll", + "status": "completed", + "sampleCount": 5, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 16.700000000000045, + "blank_gap_frames": 1, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 704, + "scroll_viewport_nodes_peak": 700, + "rendered_rows_peak": 16, + "rendered_cells_peak": 640, + "row_height_error_p95_px": 0, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 16.699999999999932, + "median": 16.700000000000045, + "max": 16.700000000000045 + }, + "blank_gap_frames": { + "min": 1, + "median": 1, + "max": 1 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 704, + "median": 704, + "max": 704 + }, + "scroll_viewport_nodes_peak": { + "min": 700, + "median": 700, + "max": 700 + }, + "rendered_rows_peak": { + "min": 16, + "median": 16, + "max": 16 + }, + "rendered_cells_peak": { + "min": 640, + "median": 640, + "max": 640 + }, + "row_height_error_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + } + ] + }, + { + "id": "H6", + "status": "satisfied", + "summary": "Wrapped-text local sorting stays within the current interaction and settle thresholds while preserving post-sort stability. Evidence is based on current repeated-run medians.", + "evidence": [ + { + "adapterId": "pretable", + "adapterFamily": "candidate", + "scenarioId": "S2", + "scriptName": "sort", + "status": "completed", + "sampleCount": 7, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: sort", + "internal telemetry rendered rows: 7", + "internal telemetry visible rows: 3", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515693", + "internal telemetry viewport range: 0-3", + "internal telemetry selected row: S2-row-1775", + "internal telemetry focused row: S2-row-1775" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: sort", + "internal telemetry rendered rows: 7", + "internal telemetry visible rows: 3", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515693", + "internal telemetry viewport range: 0-3", + "internal telemetry selected row: S2-row-1775", + "internal telemetry focused row: S2-row-1775" + ], + "varying": {} + }, + "metrics": { + "interaction_latency_ms": 16.69999999999999, + "settle_duration_ms": 16.69999999999999, + "post_interaction_blank_gap_frames": 0, + "post_interaction_anchor_shift_px": 0, + "post_interaction_row_height_error_p95_px": 1, + "result_row_count": 3000, + "selected_row_preserved": 1, + "focused_row_preserved": 1, + "dom_nodes_peak": 70, + "rendered_rows_peak": 7, + "rendered_cells_peak": 42 + }, + "metricSummary": { + "interaction_latency_ms": { + "min": 16.49999999999997, + "median": 16.69999999999999, + "max": 18.5 + }, + "settle_duration_ms": { + "min": 15.099999999999966, + "median": 16.69999999999999, + "max": 17.100000000000023 + }, + "post_interaction_blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, + "result_row_count": { + "min": 3000, + "median": 3000, + "max": 3000 + }, + "selected_row_preserved": { + "min": 1, + "median": 1, + "max": 1 + }, + "focused_row_preserved": { + "min": 1, + "median": 1, + "max": 1 + }, + "dom_nodes_peak": { + "min": 70, + "median": 70, + "max": 70 + }, + "rendered_rows_peak": { + "min": 7, + "median": 7, + "max": 7 + }, + "rendered_cells_peak": { + "min": 42, + "median": 42, + "max": 42 + } + } + }, + { + "adapterId": "ag-grid", + "adapterFamily": "full-grid", + "scenarioId": "S2", + "scriptName": "sort", + "status": "completed", + "sampleCount": 6, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: sort" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: sort" + ], + "varying": {} + }, + "metrics": { + "interaction_latency_ms": 50, + "settle_duration_ms": 8.399999999999977, + "post_interaction_blank_gap_frames": 0, + "post_interaction_anchor_shift_px": 0, + "post_interaction_row_height_error_p95_px": 2, + "result_row_count": 3000, + "selected_row_preserved": 0, + "focused_row_preserved": 0, + "dom_nodes_peak": 335, + "rendered_rows_peak": 16, + "rendered_cells_peak": 96 + }, + "metricSummary": { + "interaction_latency_ms": { + "min": 41.599999999999994, + "median": 50, + "max": 50.19999999999999 + }, + "settle_duration_ms": { + "min": 8.199999999999989, + "median": 8.399999999999977, + "max": 16.5 + }, + "post_interaction_blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_row_height_error_p95_px": { + "min": 2, + "median": 2, + "max": 2 + }, + "result_row_count": { + "min": 3000, + "median": 3000, + "max": 3000 + }, + "selected_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "focused_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 335, + "median": 335, + "max": 335 + }, + "rendered_rows_peak": { + "min": 16, + "median": 16, + "max": 16 + }, + "rendered_cells_peak": { + "min": 96, + "median": 96, + "max": 96 + } + } + }, + { + "adapterId": "tanstack", + "adapterFamily": "virtualization-primitive", + "scenarioId": "S2", + "scriptName": "sort", + "status": "completed", + "sampleCount": 3, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: sort" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: sort" + ], + "varying": {} + }, + "metrics": { + "interaction_latency_ms": 7.800000000000011, + "settle_duration_ms": 26.600000000000023, + "post_interaction_blank_gap_frames": 3, + "post_interaction_anchor_shift_px": 0, + "post_interaction_row_height_error_p95_px": 0, + "result_row_count": 3000, + "selected_row_preserved": 0, + "focused_row_preserved": 0, + "dom_nodes_peak": 499, + "rendered_rows_peak": 11, + "rendered_cells_peak": 440 + }, + "metricSummary": { + "interaction_latency_ms": { + "min": 6.399999999999977, + "median": 7.800000000000011, + "max": 10.099999999999994 + }, + "settle_duration_ms": { + "min": 24.599999999999994, + "median": 26.600000000000023, + "max": 33.39999999999998 + }, + "post_interaction_blank_gap_frames": { + "min": 3, + "median": 3, + "max": 3 + }, + "post_interaction_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_row_height_error_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "result_row_count": { + "min": 3000, + "median": 3000, + "max": 3000 + }, + "selected_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "focused_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 499, + "median": 499, + "max": 499 + }, + "rendered_rows_peak": { + "min": 11, + "median": 11, + "max": 11 + }, + "rendered_cells_peak": { + "min": 440, + "median": 440, + "max": 440 + } + } + } + ] + }, + { + "id": "H7", + "status": "satisfied", + "summary": "Metadata filtering stays within the current interaction and settle thresholds while reducing the row set without post-filter instability. Evidence is based on current repeated-run medians.", + "evidence": [ + { + "adapterId": "pretable", + "adapterFamily": "candidate", + "scenarioId": "S2", + "scriptName": "filter-metadata", + "status": "completed", + "sampleCount": 6, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-metadata", + "internal telemetry rendered rows: 7", + "internal telemetry visible rows: 3", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 103395", + "internal telemetry viewport range: 0-3", + "internal telemetry selected row: S2-row-1502", + "internal telemetry focused row: S2-row-1502" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-metadata", + "internal telemetry rendered rows: 7", + "internal telemetry visible rows: 3", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 103395", + "internal telemetry viewport range: 0-3", + "internal telemetry selected row: S2-row-1502", + "internal telemetry focused row: S2-row-1502" + ], + "varying": {} + }, + "metrics": { + "interaction_latency_ms": 16.700000000000045, + "settle_duration_ms": 16.69999999999999, + "post_interaction_blank_gap_frames": 0, + "post_interaction_anchor_shift_px": 0, + "post_interaction_row_height_error_p95_px": 1, + "result_row_count": 750, + "selected_row_preserved": 1, + "focused_row_preserved": 1, + "dom_nodes_peak": 70, + "rendered_rows_peak": 7, + "rendered_cells_peak": 42 + }, + "metricSummary": { + "interaction_latency_ms": { + "min": 8.000000000000028, + "median": 16.700000000000045, + "max": 18.30000000000001 + }, + "settle_duration_ms": { + "min": 15.5, + "median": 16.69999999999999, + "max": 17.19999999999999 + }, + "post_interaction_blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, + "result_row_count": { + "min": 750, + "median": 750, + "max": 750 + }, + "selected_row_preserved": { + "min": 1, + "median": 1, + "max": 1 + }, + "focused_row_preserved": { + "min": 1, + "median": 1, + "max": 1 + }, + "dom_nodes_peak": { + "min": 70, + "median": 70, + "max": 70 + }, + "rendered_rows_peak": { + "min": 7, + "median": 7, + "max": 7 + }, + "rendered_cells_peak": { + "min": 42, + "median": 42, + "max": 42 + } + } + }, + { + "adapterId": "ag-grid", + "adapterFamily": "full-grid", + "scenarioId": "S2", + "scriptName": "filter-metadata", + "status": "completed", + "sampleCount": 5, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-metadata" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-metadata" + ], + "varying": {} + }, + "metrics": { + "interaction_latency_ms": 50, + "settle_duration_ms": 16.600000000000023, + "post_interaction_blank_gap_frames": 0, + "post_interaction_anchor_shift_px": 0, + "post_interaction_row_height_error_p95_px": 2, + "result_row_count": 3000, + "selected_row_preserved": 0, + "focused_row_preserved": 0, + "dom_nodes_peak": 335, + "rendered_rows_peak": 16, + "rendered_cells_peak": 96 + }, + "metricSummary": { + "interaction_latency_ms": { + "min": 41, + "median": 50, + "max": 50.00000000000003 + }, + "settle_duration_ms": { + "min": 8.099999999999966, + "median": 16.600000000000023, + "max": 17.299999999999983 + }, + "post_interaction_blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_row_height_error_p95_px": { + "min": 2, + "median": 2, + "max": 2 + }, + "result_row_count": { + "min": 3000, + "median": 3000, + "max": 3000 + }, + "selected_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "focused_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 335, + "median": 335, + "max": 335 + }, + "rendered_rows_peak": { + "min": 16, + "median": 16, + "max": 16 + }, + "rendered_cells_peak": { + "min": 96, + "median": 96, + "max": 96 + } + } + }, + { + "adapterId": "tanstack", + "adapterFamily": "virtualization-primitive", + "scenarioId": "S2", + "scriptName": "filter-metadata", + "status": "completed", + "sampleCount": 1, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-metadata" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-metadata" + ], + "varying": {} + }, + "metrics": { + "interaction_latency_ms": 7.399999999999977, + "settle_duration_ms": 26, + "post_interaction_blank_gap_frames": 3, + "post_interaction_anchor_shift_px": 0, + "post_interaction_row_height_error_p95_px": 0, + "result_row_count": 3000, + "selected_row_preserved": 0, + "focused_row_preserved": 0, + "dom_nodes_peak": 499, + "rendered_rows_peak": 11, + "rendered_cells_peak": 440 + }, + "metricSummary": { + "interaction_latency_ms": { + "min": 7.399999999999977, + "median": 7.399999999999977, + "max": 7.399999999999977 + }, + "settle_duration_ms": { + "min": 26, + "median": 26, + "max": 26 + }, + "post_interaction_blank_gap_frames": { + "min": 3, + "median": 3, + "max": 3 + }, + "post_interaction_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_row_height_error_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "result_row_count": { + "min": 3000, + "median": 3000, + "max": 3000 + }, + "selected_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "focused_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 499, + "median": 499, + "max": 499 + }, + "rendered_rows_peak": { + "min": 11, + "median": 11, + "max": 11 + }, + "rendered_cells_peak": { + "min": 440, + "median": 440, + "max": 440 + } + } + } + ] + }, + { + "id": "H8", + "status": "satisfied", + "summary": "Wrapped-text primary-column filtering stays within the current interaction and settle thresholds while preserving post-filter stability. Evidence is based on current repeated-run medians.", + "evidence": [ + { + "adapterId": "pretable", + "adapterFamily": "candidate", + "scenarioId": "S2", + "scriptName": "filter-text", + "status": "completed", + "sampleCount": 6, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-text", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 86896", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: S2-row-1501", + "internal telemetry focused row: S2-row-1501" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-text", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 86896", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: S2-row-1501", + "internal telemetry focused row: S2-row-1501" + ], + "varying": {} + }, + "metrics": { + "interaction_latency_ms": 16.69999999999999, + "settle_duration_ms": 16.69999999999999, + "post_interaction_blank_gap_frames": 0, + "post_interaction_anchor_shift_px": 0, + "post_interaction_row_height_error_p95_px": 1, + "result_row_count": 500, + "selected_row_preserved": 1, + "focused_row_preserved": 1, + "dom_nodes_peak": 63, + "rendered_rows_peak": 6, + "rendered_cells_peak": 36 + }, + "metricSummary": { + "interaction_latency_ms": { + "min": 15.400000000000006, + "median": 16.69999999999999, + "max": 18.400000000000006 + }, + "settle_duration_ms": { + "min": 15, + "median": 16.69999999999999, + "max": 18.100000000000023 + }, + "post_interaction_blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, + "result_row_count": { + "min": 500, + "median": 500, + "max": 500 + }, + "selected_row_preserved": { + "min": 1, + "median": 1, + "max": 1 + }, + "focused_row_preserved": { + "min": 1, + "median": 1, + "max": 1 + }, + "dom_nodes_peak": { + "min": 63, + "median": 63, + "max": 63 + }, + "rendered_rows_peak": { + "min": 6, + "median": 6, + "max": 6 + }, + "rendered_cells_peak": { + "min": 36, + "median": 36, + "max": 36 + } + } + }, + { + "adapterId": "ag-grid", + "adapterFamily": "full-grid", + "scenarioId": "S2", + "scriptName": "filter-text", + "status": "completed", + "sampleCount": 5, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-text" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-text" + ], + "varying": {} + }, + "metrics": { + "interaction_latency_ms": 50, + "settle_duration_ms": 8.300000000000011, + "post_interaction_blank_gap_frames": 0, + "post_interaction_anchor_shift_px": 0, + "post_interaction_row_height_error_p95_px": 2, + "result_row_count": 3000, + "selected_row_preserved": 0, + "focused_row_preserved": 0, + "dom_nodes_peak": 335, + "rendered_rows_peak": 16, + "rendered_cells_peak": 96 + }, + "metricSummary": { + "interaction_latency_ms": { + "min": 49.80000000000001, + "median": 50, + "max": 59.5 + }, + "settle_duration_ms": { + "min": 6.800000000000011, + "median": 8.300000000000011, + "max": 10 + }, + "post_interaction_blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_row_height_error_p95_px": { + "min": 2, + "median": 2, + "max": 2 + }, + "result_row_count": { + "min": 3000, + "median": 3000, + "max": 3000 + }, + "selected_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "focused_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 335, + "median": 335, + "max": 335 + }, + "rendered_rows_peak": { + "min": 16, + "median": 16, + "max": 16 + }, + "rendered_cells_peak": { + "min": 96, + "median": 96, + "max": 96 + } + } + }, + { + "adapterId": "tanstack", + "adapterFamily": "virtualization-primitive", + "scenarioId": "S2", + "scriptName": "filter-text", + "status": "completed", + "sampleCount": 1, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-text" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-text" + ], + "varying": {} + }, + "metrics": { + "interaction_latency_ms": 8.399999999999977, + "settle_duration_ms": 23.400000000000034, + "post_interaction_blank_gap_frames": 3, + "post_interaction_anchor_shift_px": 0, + "post_interaction_row_height_error_p95_px": 0, + "result_row_count": 3000, + "selected_row_preserved": 0, + "focused_row_preserved": 0, + "dom_nodes_peak": 499, + "rendered_rows_peak": 11, + "rendered_cells_peak": 440 + }, + "metricSummary": { + "interaction_latency_ms": { + "min": 8.399999999999977, + "median": 8.399999999999977, + "max": 8.399999999999977 + }, + "settle_duration_ms": { + "min": 23.400000000000034, + "median": 23.400000000000034, + "max": 23.400000000000034 + }, + "post_interaction_blank_gap_frames": { + "min": 3, + "median": 3, + "max": 3 + }, + "post_interaction_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_row_height_error_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "result_row_count": { + "min": 3000, + "median": 3000, + "max": 3000 + }, + "selected_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "focused_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 499, + "median": 499, + "max": 499 + }, + "rendered_rows_peak": { + "min": 11, + "median": 11, + "max": 11 + }, + "rendered_cells_peak": { + "min": 440, + "median": 440, + "max": 440 + } + } + } + ] + }, + { + "id": "H5", + "status": "satisfied", + "summary": "The matrix run writes summary JSON, trace artifacts, and a machine-readable hypothesis report from one command.", + "evidence": [ + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-metadata-2026-05-13t00-47-53-522z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-metadata-2026-05-13t00-48-07-864z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-metadata-2026-05-13t00-48-22-810z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-metadata-2026-05-13t00-50-05-982z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-metadata-2026-05-13t00-50-20-381z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-text-2026-05-13t00-47-55-291z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-text-2026-05-13t00-48-09-712z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-text-2026-05-13t00-48-24-751z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-text-2026-05-13t00-50-07-806z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-text-2026-05-13t00-50-22-188z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-2026-05-13t00-47-49-543z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-2026-05-13t00-48-03-848z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-2026-05-13t00-48-18-635z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-2026-05-13t00-50-01-983z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-2026-05-13t00-50-16-393z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-2026-05-13t00-50-30-888z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-format-2026-05-13t00-47-57-095z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-format-2026-05-13t00-48-11-588z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-format-2026-05-13t00-48-26-683z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-format-2026-05-13t00-50-09-605z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-format-2026-05-13t00-50-24-078z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-48-01-551z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-48-16-167z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-48-31-250z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-50-14-086z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-50-28-564z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-render-2026-05-13t00-47-59-287z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-render-2026-05-13t00-48-13-839z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-render-2026-05-13t00-48-28-938z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-render-2026-05-13t00-50-11-802z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-render-2026-05-13t00-50-26-273z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-sort-2026-05-13t00-47-51-726z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-sort-2026-05-13t00-48-06-040z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-sort-2026-05-13t00-48-20-981z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-sort-2026-05-13t00-50-04-168z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-sort-2026-05-13t00-50-18-562z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-sort-2026-05-13t00-50-33-082z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-47-12-425z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-47-25-911z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-47-39-641z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-49-23-997z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-49-37-998z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-49-52-158z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-text-2026-05-13t00-47-14-171z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-text-2026-05-13t00-47-27-664z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-text-2026-05-13t00-47-41-447z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-text-2026-05-13t00-49-25-766z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-text-2026-05-13t00-49-39-900z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-text-2026-05-13t00-49-53-940z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-2026-05-13t00-47-08-593z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-2026-05-13t00-47-22-098z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-2026-05-13t00-47-35-734z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-2026-05-13t00-48-39-389z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-2026-05-13t00-49-20-113z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-2026-05-13t00-49-33-950z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-2026-05-13t00-49-48-192z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-format-2026-05-13t00-47-15-917z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-format-2026-05-13t00-47-29-444z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-format-2026-05-13t00-47-43-276z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-format-2026-05-13t00-49-27-536z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-format-2026-05-13t00-49-41-798z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-format-2026-05-13t00-49-55-730z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-47-20-046z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-47-33-599z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-47-47-476z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-49-31-827z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-49-46-095z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-49-59-931z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-render-2026-05-13t00-47-17-994z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-render-2026-05-13t00-47-31-495z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-render-2026-05-13t00-47-45-377z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-render-2026-05-13t00-49-29-654z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-render-2026-05-13t00-49-43-990z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-render-2026-05-13t00-49-57-823z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-sort-2026-05-13t00-47-10-664z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-sort-2026-05-13t00-47-24-158z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-sort-2026-05-13t00-47-37-849z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-sort-2026-05-13t00-48-41-501z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-sort-2026-05-13t00-49-22-232z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-sort-2026-05-13t00-49-36-106z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-sort-2026-05-13t00-49-50-367z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 0, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-filter-metadata-2026-05-13t00-51-18-892z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 0, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-filter-text-2026-05-13t00-51-20-715z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-2026-05-13t00-48-33-632z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-2026-05-13t00-49-42-275z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-2026-05-13t00-50-56-654z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-2026-05-13t00-51-14-956z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-2026-05-13t00-51-29-737z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 0, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-with-format-2026-05-13t00-51-22-515z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-51-27-192z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-with-render-2026-05-13t00-51-24-727z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-sort-2026-05-13t00-48-35-885z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-sort-2026-05-13t00-50-58-779z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-sort-2026-05-13t00-51-17-111z.summary.json" + } + ] + }, + { + "id": "H9", + "status": "insufficient", + "summary": "Missing a completed S7 scroll run, so composite scroll quality cannot be evaluated yet.", + "evidence": [] + }, + { + "id": "H10", + "status": "insufficient", + "summary": "Missing a completed S7 sort run, so local sort interaction proof is not available yet.", + "evidence": [] + }, + { + "id": "H11", + "status": "insufficient", + "summary": "Missing a completed S7 metadata-filter run, so metadata filter proof is not available yet.", + "evidence": [] + }, + { + "id": "H12", + "status": "insufficient", + "summary": "Missing a completed S7 text-filter run, so wrapped-text filter proof is not available yet.", + "evidence": [] + }, + { + "id": "H13", + "status": "insufficient", + "summary": "Missing a completed S5 updates run, so streaming update frame budget proof is not available yet.", + "evidence": [] + }, + { + "id": "H14", + "status": "insufficient", + "summary": "Missing rate-tagged S5 updates runs for pretable, so the streaming operating envelope cannot be evaluated yet.", + "evidence": [] + }, + { + "id": "H15", + "status": "insufficient", + "summary": "Missing rate-tagged S5 updates runs for pretable, so streaming row stability cannot be evaluated yet.", + "evidence": [] + }, + { + "id": "H16", + "status": "insufficient", + "summary": "No completed S2/hypothesis pretable select-range-extend runs available.", + "evidence": [] + }, + { + "id": "H17", + "status": "insufficient", + "summary": "No completed S2/hypothesis pretable keyboard-nav-row runs available.", + "evidence": [] + }, + { + "id": "H18", + "status": "insufficient", + "summary": "No completed S2/hypothesis pretable select-all runs available.", + "evidence": [] + }, + { + "id": "H19", + "status": "satisfied", + "summary": "Format overhead is 0.10ms (≤ 2ms; format 10.300000000000011ms, baseline 10.199999999999989ms).", + "evidence": [ + { + "adapterId": "pretable", + "adapterFamily": "candidate", + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "status": "completed", + "sampleCount": 6, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 10.300000000000011, + "blank_gap_frames": 0, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 98, + "scroll_viewport_nodes_peak": 93, + "rendered_rows_peak": 11, + "rendered_cells_peak": 66, + "row_height_error_p95_px": 1, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 10.199999999999989, + "median": 10.300000000000011, + "max": 10.399999999999977 + }, + "blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 98, + "median": 98, + "max": 98 + }, + "scroll_viewport_nodes_peak": { + "min": 93, + "median": 93, + "max": 93 + }, + "rendered_rows_peak": { + "min": 11, + "median": 11, + "max": 11 + }, + "rendered_cells_peak": { + "min": 66, + "median": 66, + "max": 66 + }, + "row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + }, + { + "adapterId": "pretable", + "adapterFamily": "candidate", + "scenarioId": "S2", + "scriptName": "scroll", + "status": "completed", + "sampleCount": 7, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 10.199999999999989, + "blank_gap_frames": 0, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 98, + "scroll_viewport_nodes_peak": 93, + "rendered_rows_peak": 11, + "rendered_cells_peak": 66, + "row_height_error_p95_px": 1, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 10.099999999999966, + "median": 10.199999999999989, + "max": 10.300000000000011 + }, + "blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 98, + "median": 98, + "max": 98 + }, + "scroll_viewport_nodes_peak": { + "min": 93, + "median": 93, + "max": 93 + }, + "rendered_rows_peak": { + "min": 11, + "median": 11, + "max": 11 + }, + "rendered_cells_peak": { + "min": 66, + "median": 66, + "max": 66 + }, + "row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + }, + { + "adapterId": "ag-grid", + "adapterFamily": "full-grid", + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "status": "completed", + "sampleCount": 5, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 17.899999999999977, + "blank_gap_frames": 1, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 412, + "scroll_viewport_nodes_peak": 195, + "rendered_rows_peak": 27, + "rendered_cells_peak": 162, + "row_height_error_p95_px": 2, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 16.90000000000009, + "median": 17.899999999999977, + "max": 24.600000000000023 + }, + "blank_gap_frames": { + "min": 1, + "median": 1, + "max": 1 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 412, + "median": 412, + "max": 412 + }, + "scroll_viewport_nodes_peak": { + "min": 195, + "median": 195, + "max": 195 + }, + "rendered_rows_peak": { + "min": 27, + "median": 27, + "max": 27 + }, + "rendered_cells_peak": { + "min": 162, + "median": 162, + "max": 162 + }, + "row_height_error_p95_px": { + "min": 2, + "median": 2, + "max": 2 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + }, + { + "adapterId": "tanstack", + "adapterFamily": "virtualization-primitive", + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "status": "completed", + "sampleCount": 1, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 16.700000000000045, + "blank_gap_frames": 1, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 704, + "scroll_viewport_nodes_peak": 700, + "rendered_rows_peak": 16, + "rendered_cells_peak": 640, + "row_height_error_p95_px": 0, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 16.700000000000045, + "median": 16.700000000000045, + "max": 16.700000000000045 + }, + "blank_gap_frames": { + "min": 1, + "median": 1, + "max": 1 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 704, + "median": 704, + "max": 704 + }, + "scroll_viewport_nodes_peak": { + "min": 700, + "median": 700, + "max": 700 + }, + "rendered_rows_peak": { + "min": 16, + "median": 16, + "max": 16 + }, + "rendered_cells_peak": { + "min": 640, + "median": 640, + "max": 640 + }, + "row_height_error_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + } + ] + }, + { + "id": "H20", + "status": "satisfied", + "summary": "Cheap render scroll p95 is 10.300000000000011ms (≤ 16ms single-frame budget).", + "evidence": [ + { + "adapterId": "pretable", + "adapterFamily": "candidate", + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "status": "completed", + "sampleCount": 6, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 10.300000000000011, + "blank_gap_frames": 0, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 164, + "scroll_viewport_nodes_peak": 159, + "rendered_rows_peak": 11, + "rendered_cells_peak": 66, + "row_height_error_p95_px": 1, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 9.699999999999932, + "median": 10.300000000000011, + "max": 10.300000000000011 + }, + "blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 164, + "median": 164, + "max": 164 + }, + "scroll_viewport_nodes_peak": { + "min": 159, + "median": 159, + "max": 159 + }, + "rendered_rows_peak": { + "min": 11, + "median": 11, + "max": 11 + }, + "rendered_cells_peak": { + "min": 66, + "median": 66, + "max": 66 + }, + "row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + }, + { + "adapterId": "ag-grid", + "adapterFamily": "full-grid", + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "status": "completed", + "sampleCount": 5, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 24.600000000000023, + "blank_gap_frames": 1, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 412, + "scroll_viewport_nodes_peak": 195, + "rendered_rows_peak": 27, + "rendered_cells_peak": 162, + "row_height_error_p95_px": 2, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 18.400000000000034, + "median": 24.600000000000023, + "max": 25 + }, + "blank_gap_frames": { + "min": 1, + "median": 1, + "max": 1 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 412, + "median": 412, + "max": 412 + }, + "scroll_viewport_nodes_peak": { + "min": 195, + "median": 195, + "max": 195 + }, + "rendered_rows_peak": { + "min": 27, + "median": 27, + "max": 27 + }, + "rendered_cells_peak": { + "min": 162, + "median": 162, + "max": 162 + }, + "row_height_error_p95_px": { + "min": 2, + "median": 2, + "max": 2 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + }, + { + "adapterId": "tanstack", + "adapterFamily": "virtualization-primitive", + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "status": "completed", + "sampleCount": 1, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 17.699999999999932, + "blank_gap_frames": 1, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 1344, + "scroll_viewport_nodes_peak": 1340, + "rendered_rows_peak": 16, + "rendered_cells_peak": 640, + "row_height_error_p95_px": 0, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 17.699999999999932, + "median": 17.699999999999932, + "max": 17.699999999999932 + }, + "blank_gap_frames": { + "min": 1, + "median": 1, + "max": 1 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 1344, + "median": 1344, + "max": 1344 + }, + "scroll_viewport_nodes_peak": { + "min": 1340, + "median": 1340, + "max": 1340 + }, + "rendered_rows_peak": { + "min": 16, + "median": 16, + "max": 16 + }, + "rendered_cells_peak": { + "min": 640, + "median": 640, + "max": 640 + }, + "row_height_error_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + } + ] + }, + { + "id": "H21", + "status": "satisfied", + "summary": "Heavy render scroll p95 is 10.300000000000011ms (≤ 20ms; ≤ 25% above single-frame budget).", + "evidence": [ + { + "adapterId": "pretable", + "adapterFamily": "candidate", + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "status": "completed", + "sampleCount": 6, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "internal telemetry rendered rows: 6", + "internal telemetry visible rows: 2", + "internal telemetry total rows: 3000", + "internal telemetry planned height: 515700", + "internal telemetry viewport range: 0-2", + "internal telemetry selected row: none", + "internal telemetry focused row: none" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 10.300000000000011, + "blank_gap_frames": 0, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 296, + "scroll_viewport_nodes_peak": 291, + "rendered_rows_peak": 11, + "rendered_cells_peak": 66, + "row_height_error_p95_px": 1, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 10.299999999999955, + "median": 10.300000000000011, + "max": 10.300000000000068 + }, + "blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 296, + "median": 296, + "max": 296 + }, + "scroll_viewport_nodes_peak": { + "min": 291, + "median": 291, + "max": 291 + }, + "rendered_rows_peak": { + "min": 11, + "median": 11, + "max": 11 + }, + "rendered_cells_peak": { + "min": 66, + "median": 66, + "max": 66 + }, + "row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + }, + { + "adapterId": "ag-grid", + "adapterFamily": "full-grid", + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "status": "completed", + "sampleCount": 5, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 25, + "blank_gap_frames": 1, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 412, + "scroll_viewport_nodes_peak": 195, + "rendered_rows_peak": 27, + "rendered_cells_peak": 162, + "row_height_error_p95_px": 2, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 25, + "median": 25, + "max": 25.30000000000001 + }, + "blank_gap_frames": { + "min": 1, + "median": 1, + "max": 1 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 412, + "median": 412, + "max": 412 + }, + "scroll_viewport_nodes_peak": { + "min": 195, + "median": 195, + "max": 195 + }, + "rendered_rows_peak": { + "min": 27, + "median": 27, + "max": 27 + }, + "rendered_cells_peak": { + "min": 162, + "median": 162, + "max": 162 + }, + "row_height_error_p95_px": { + "min": 2, + "median": 2, + "max": 2 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + }, + { + "adapterId": "tanstack", + "adapterFamily": "virtualization-primitive", + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "status": "completed", + "sampleCount": 1, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 24.80000000000001, + "blank_gap_frames": 1, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 2624, + "scroll_viewport_nodes_peak": 2620, + "rendered_rows_peak": 16, + "rendered_cells_peak": 640, + "row_height_error_p95_px": 0, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 24.80000000000001, + "median": 24.80000000000001, + "max": 24.80000000000001 + }, + "blank_gap_frames": { + "min": 1, + "median": 1, + "max": 1 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 2624, + "median": 2624, + "max": 2624 + }, + "scroll_viewport_nodes_peak": { + "min": 2620, + "median": 2620, + "max": 2620 + }, + "rendered_rows_peak": { + "min": 16, + "median": 16, + "max": 16 + }, + "rendered_cells_peak": { + "min": 640, + "median": 640, + "max": 640 + }, + "row_height_error_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + } + ] + }, + { + "id": "H22", + "status": "insufficient", + "summary": "No completed pretable S2 autosize runs available.", + "evidence": [] + } + ] +} From c2fd72bca02f44aa47c72b5acd246a898de198ed Mon Sep 17 00:00:00 2001 From: Brian Love Date: Tue, 12 May 2026 17:52:50 -0700 Subject: [PATCH 09/13] =?UTF-8?q?docs(research):=20repo-memory=20entry=20?= =?UTF-8?q?=E2=80=94=20comparator-aware=20evaluators?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Architecture change documenting the H6/H7/H8/H19/H20/H21 evaluator extensions, the matrix-runner flake workaround, and the deferred follow-ups. Co-Authored-By: Claude Opus 4.7 --- docs/research/repo-memory.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/research/repo-memory.md b/docs/research/repo-memory.md index 073954a..706590d 100644 --- a/docs/research/repo-memory.md +++ b/docs/research/repo-memory.md @@ -498,3 +498,23 @@ Editorial PR landing the PR #131 sort + filter comparator wedge on the homepage. - **PR #129 streaming reframe** — still awaiting user prose review. Touches `ComparisonTable.tsx` (streaming-row rename) and `ReceiptsBand.tsx`. File-level conflict with this PR is limited to the docblock at the top of `ComparisonTable.tsx`; resolvable. - **High-repeat protocol for interaction borderlines** — logged here. - **Pretable `scroll-with-render` 16.4 ms anomaly** — logged in the 2026-05-10 entry above; still pending investigation. + +## 2026-05-12 + +### Comparator-aware evaluators — architecture change + +Six pretable-only evaluators in `scripts/bench-matrix.mjs` (H6, H7, H8 interaction + H19, H20, H21 cell-renderer) now embed comparator-adapter evidence in their `evidence` arrays. Mirrors `evaluateH1`'s pre-existing pattern. Status logic unchanged — pretable's absolute thresholds still drive verdicts; comparator data is informational. Replaces (over time) the per-PR aggregator-script pattern that fed the `/bench` page through PRs #130, #131, #132. + +- New `findComparatorEvidence(runs, { scenarioId, scriptName })` helper in `scripts/bench-matrix.mjs` returns all non-pretable adapter series for a slice via `groupRunSeries` + `summarizeRunSeriesEvidence`. Single helper used by all six target evaluators. +- Each of H6/H7/H8/H19/H20/H21 appends `...comparatorEvidence` to its `evidence:` array in every return branch (insufficient / failing / satisfied / directional). For `insufficient` branches without pretable data, the evidence array stays empty — comparator data alone doesn't satisfy any hypothesis. +- H19 (format overhead) keeps pretable's format + scroll-baseline entries at the front of the array; comparator entries are absolute `scroll-with-format` p95, NOT format-vs-baseline deltas. Inline docblock documents the semantics so future readers don't conflate the two. +- Six new test cases in `scripts/__tests__/bench-matrix.test.mjs` assert evidence-array contents when comparator runs are present. All existing status-verdict tests untouched. +- Matrix re-run intended at 4 adapters × 7 scripts × 3 repeats = 84 runs. The matrix runner's end-of-run report-writer flaked repeatedly in this worktree (port-binding issue surfaced as a side effect of multiple matrix invocations stacking up); synthesized the milestone from per-run summaries by calling `createHypothesisReport` directly. Pretable + ag-grid + tanstack data captured per evaluator; MUI flaked entirely and is absent from the milestone — evaluator correctly handles whatever comparator data is present per-slice. +- Milestone: `status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json`. All seven hypotheses (H1, H6–H8, H19–H21) retained `satisfied` status — architectural change was data-only. + +### Out of scope (deferred) + +- **`/bench` page swap to read from `hypotheses.json` directly.** Aggregator scripts (`scripts/extract-interaction-summary.mjs` + the inline aggregators) still feed the page; can be retired once the page reads from the new milestone shape. Editorial-only PR. +- **Per-adapter format-overhead deltas in H19.** Currently H19's status compares pretable's `scroll-with-format` p95 against pretable's `scroll` baseline; comparator evidence surfaces absolute format p95 only. Computing per-adapter deltas would extend H19 from a pretable-quality check into a comparative-overhead check — a different hypothesis. +- **Matrix runner reliability.** The end-of-run report-writer flake is well-documented across PRs #133, #134, this one. Worth investigating as its own follow-up; for now the synthesized-from-summaries pattern works. +- **MUI matrix coverage.** This milestone has no MUI evidence due to the matrix flake; the evaluator is ready to surface MUI data when the matrix runner is stable. From ebf3d3acd4710bc097f21ed84034a0ee1c9cd16e Mon Sep 17 00:00:00 2001 From: Brian Love Date: Tue, 12 May 2026 17:53:12 -0700 Subject: [PATCH 10/13] chore(bench): matrix milestone for comparator-aware evaluators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit H6/H7/H8/H19/H20/H21 now embed comparator evidence in their evidence arrays (4 entries each; 5 for H19 which also carries pretable's scroll baseline). Pretable-only status verdicts unchanged — all six remain satisfied. Aggregated from today's S2 hypothesis-scale runs across pretable/ag-grid/tanstack/mui after the matrix runner hit two mid-run e2e flakes (preview server / locator timing); on-disk summaries combined into a single report. Co-Authored-By: Claude Opus 4.7 --- ...omparator-aware-evaluators.hypotheses.json | 1244 ++++++++++++++--- 1 file changed, 1027 insertions(+), 217 deletions(-) diff --git a/status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json b/status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json index 4d8162b..47d4dba 100644 --- a/status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json +++ b/status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json @@ -1,11 +1,15 @@ { "runsetId": "2026-05-12-comparator-aware-evaluators", - "generatedAt": "2026-05-13T00:51:31.364Z", + "generatedAt": "2026-05-13T00:53:04.344Z", "adapters": [ { "adapterId": "ag-grid", "adapterFamily": "full-grid" }, + { + "adapterId": "mui", + "adapterFamily": "full-grid" + }, { "adapterId": "pretable", "adapterFamily": "candidate" @@ -18,6 +22,7 @@ "matrix": { "adapters": [ "ag-grid", + "mui", "pretable", "tanstack" ], @@ -33,7 +38,7 @@ "scroll-with-render", "sort" ], - "repeats": 1 + "repeats": 7 }, "slices": [ { @@ -41,6 +46,7 @@ "scriptName": "filter-metadata", "adapterIds": [ "ag-grid", + "mui", "pretable", "tanstack" ], @@ -76,6 +82,7 @@ "scriptName": "filter-text", "adapterIds": [ "ag-grid", + "mui", "pretable", "tanstack" ], @@ -111,6 +118,7 @@ "scriptName": "scroll", "adapterIds": [ "ag-grid", + "mui", "pretable", "tanstack" ], @@ -144,6 +152,7 @@ "scriptName": "scroll-with-format", "adapterIds": [ "ag-grid", + "mui", "pretable", "tanstack" ], @@ -177,6 +186,7 @@ "scriptName": "scroll-with-heavy-render", "adapterIds": [ "ag-grid", + "mui", "pretable", "tanstack" ], @@ -210,6 +220,7 @@ "scriptName": "scroll-with-render", "adapterIds": [ "ag-grid", + "mui", "pretable", "tanstack" ], @@ -243,6 +254,7 @@ "scriptName": "sort", "adapterIds": [ "ag-grid", + "mui", "pretable", "tanstack" ], @@ -396,12 +408,12 @@ } }, { - "adapterId": "ag-grid", + "adapterId": "mui", "adapterFamily": "full-grid", "scenarioId": "S2", "scriptName": "scroll", "status": "completed", - "sampleCount": 6, + "sampleCount": 3, "policyNotes": { "common": [ "contain: none", @@ -420,29 +432,29 @@ "varying": {} }, "metrics": { - "scroll_frame_p95_ms": 18, - "blank_gap_frames": 1, + "scroll_frame_p95_ms": 10.300000000000068, + "blank_gap_frames": 0, "long_tasks_count": 0, "long_tasks_ms": 0, - "dom_nodes_peak": 412, - "scroll_viewport_nodes_peak": 195, - "rendered_rows_peak": 27, - "rendered_cells_peak": 162, - "row_height_error_p95_px": 2, + "dom_nodes_peak": 251, + "scroll_viewport_nodes_peak": 239, + "rendered_rows_peak": 22, + "rendered_cells_peak": 115, + "row_height_error_p95_px": 1, "scroll_anchor_shift_px": 0, "scroll_anchor_shift_forward_p95_px": 0, "scroll_anchor_shift_backward_p95_px": 0 }, "metricSummary": { "scroll_frame_p95_ms": { - "min": 17.100000000000023, - "median": 18, - "max": 25 + "min": 10.300000000000011, + "median": 10.300000000000068, + "max": 16.69999999999999 }, "blank_gap_frames": { - "min": 1, - "median": 1, - "max": 1 + "min": 0, + "median": 0, + "max": 0 }, "long_tasks_count": { "min": 0, @@ -455,29 +467,29 @@ "max": 0 }, "dom_nodes_peak": { - "min": 412, - "median": 412, - "max": 412 + "min": 251, + "median": 251, + "max": 251 }, "scroll_viewport_nodes_peak": { - "min": 195, - "median": 195, - "max": 195 + "min": 239, + "median": 239, + "max": 239 }, "rendered_rows_peak": { - "min": 27, - "median": 27, - "max": 27 + "min": 22, + "median": 22, + "max": 22 }, "rendered_cells_peak": { - "min": 162, - "median": 162, - "max": 162 + "min": 115, + "median": 115, + "max": 115 }, "row_height_error_p95_px": { - "min": 2, - "median": 2, - "max": 2 + "min": 1, + "median": 1, + "max": 1 }, "scroll_anchor_shift_px": { "min": 0, @@ -502,7 +514,7 @@ "scenarioId": "S2", "scriptName": "scroll", "status": "completed", - "sampleCount": 5, + "sampleCount": 6, "policyNotes": { "common": [ "contain: none", @@ -812,13 +824,110 @@ } } }, + { + "adapterId": "mui", + "adapterFamily": "full-grid", + "scenarioId": "S2", + "scriptName": "sort", + "status": "completed", + "sampleCount": 3, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: sort" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: sort" + ], + "varying": {} + }, + "metrics": { + "interaction_latency_ms": 25, + "settle_duration_ms": 24.900000000000034, + "post_interaction_blank_gap_frames": 0, + "post_interaction_anchor_shift_px": 0, + "post_interaction_row_height_error_p95_px": 1, + "result_row_count": 3000, + "selected_row_preserved": 0, + "focused_row_preserved": 0, + "dom_nodes_peak": 220, + "rendered_rows_peak": 10, + "rendered_cells_peak": 70 + }, + "metricSummary": { + "interaction_latency_ms": { + "min": 24.5, + "median": 25, + "max": 42.39999999999998 + }, + "settle_duration_ms": { + "min": 24.80000000000001, + "median": 24.900000000000034, + "max": 25.19999999999999 + }, + "post_interaction_blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, + "result_row_count": { + "min": 3000, + "median": 3000, + "max": 3000 + }, + "selected_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "focused_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 220, + "median": 220, + "max": 220 + }, + "rendered_rows_peak": { + "min": 10, + "median": 10, + "max": 10 + }, + "rendered_cells_peak": { + "min": 70, + "median": 70, + "max": 70 + } + } + }, { "adapterId": "tanstack", "adapterFamily": "virtualization-primitive", "scenarioId": "S2", "scriptName": "sort", "status": "completed", - "sampleCount": 3, + "sampleCount": 5, "policyNotes": { "common": [ "contain: none", @@ -839,8 +948,8 @@ "varying": {} }, "metrics": { - "interaction_latency_ms": 7.800000000000011, - "settle_duration_ms": 26.600000000000023, + "interaction_latency_ms": 10.099999999999994, + "settle_duration_ms": 26.200000000000045, "post_interaction_blank_gap_frames": 3, "post_interaction_anchor_shift_px": 0, "post_interaction_row_height_error_p95_px": 0, @@ -854,12 +963,12 @@ "metricSummary": { "interaction_latency_ms": { "min": 6.399999999999977, - "median": 7.800000000000011, - "max": 10.099999999999994 + "median": 10.099999999999994, + "max": 41.89999999999998 }, "settle_duration_ms": { - "min": 24.599999999999994, - "median": 26.600000000000023, + "min": 23.100000000000023, + "median": 26.200000000000045, "max": 33.39999999999998 }, "post_interaction_blank_gap_frames": { @@ -1124,13 +1233,110 @@ } } }, + { + "adapterId": "mui", + "adapterFamily": "full-grid", + "scenarioId": "S2", + "scriptName": "filter-metadata", + "status": "completed", + "sampleCount": 3, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-metadata" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-metadata" + ], + "varying": {} + }, + "metrics": { + "interaction_latency_ms": 33.29999999999998, + "settle_duration_ms": 24.900000000000034, + "post_interaction_blank_gap_frames": 0, + "post_interaction_anchor_shift_px": 0, + "post_interaction_row_height_error_p95_px": 1, + "result_row_count": 3000, + "selected_row_preserved": 0, + "focused_row_preserved": 0, + "dom_nodes_peak": 220, + "rendered_rows_peak": 10, + "rendered_cells_peak": 70 + }, + "metricSummary": { + "interaction_latency_ms": { + "min": 25, + "median": 33.29999999999998, + "max": 34.10000000000002 + }, + "settle_duration_ms": { + "min": 24.5, + "median": 24.900000000000034, + "max": 50.10000000000002 + }, + "post_interaction_blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, + "result_row_count": { + "min": 3000, + "median": 3000, + "max": 3000 + }, + "selected_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "focused_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 220, + "median": 220, + "max": 220 + }, + "rendered_rows_peak": { + "min": 10, + "median": 10, + "max": 10 + }, + "rendered_cells_peak": { + "min": 70, + "median": 70, + "max": 70 + } + } + }, { "adapterId": "tanstack", "adapterFamily": "virtualization-primitive", "scenarioId": "S2", "scriptName": "filter-metadata", "status": "completed", - "sampleCount": 1, + "sampleCount": 3, "policyNotes": { "common": [ "contain: none", @@ -1151,8 +1357,8 @@ "varying": {} }, "metrics": { - "interaction_latency_ms": 7.399999999999977, - "settle_duration_ms": 26, + "interaction_latency_ms": 16.80000000000001, + "settle_duration_ms": 25, "post_interaction_blank_gap_frames": 3, "post_interaction_anchor_shift_px": 0, "post_interaction_row_height_error_p95_px": 0, @@ -1166,12 +1372,12 @@ "metricSummary": { "interaction_latency_ms": { "min": 7.399999999999977, - "median": 7.399999999999977, - "max": 7.399999999999977 + "median": 16.80000000000001, + "max": 16.900000000000006 }, "settle_duration_ms": { - "min": 26, - "median": 26, + "min": 24.899999999999977, + "median": 25, "max": 26 }, "post_interaction_blank_gap_frames": { @@ -1437,12 +1643,12 @@ } }, { - "adapterId": "tanstack", - "adapterFamily": "virtualization-primitive", + "adapterId": "mui", + "adapterFamily": "full-grid", "scenarioId": "S2", "scriptName": "filter-text", "status": "completed", - "sampleCount": 1, + "sampleCount": 3, "policyNotes": { "common": [ "contain: none", @@ -1463,44 +1669,44 @@ "varying": {} }, "metrics": { - "interaction_latency_ms": 8.399999999999977, - "settle_duration_ms": 23.400000000000034, - "post_interaction_blank_gap_frames": 3, + "interaction_latency_ms": 24.80000000000001, + "settle_duration_ms": 25.600000000000023, + "post_interaction_blank_gap_frames": 0, "post_interaction_anchor_shift_px": 0, - "post_interaction_row_height_error_p95_px": 0, + "post_interaction_row_height_error_p95_px": 1, "result_row_count": 3000, "selected_row_preserved": 0, "focused_row_preserved": 0, - "dom_nodes_peak": 499, - "rendered_rows_peak": 11, - "rendered_cells_peak": 440 + "dom_nodes_peak": 225, + "rendered_rows_peak": 10, + "rendered_cells_peak": 70 }, "metricSummary": { "interaction_latency_ms": { - "min": 8.399999999999977, - "median": 8.399999999999977, - "max": 8.399999999999977 + "min": 24.399999999999977, + "median": 24.80000000000001, + "max": 25 }, "settle_duration_ms": { - "min": 23.400000000000034, - "median": 23.400000000000034, - "max": 23.400000000000034 + "min": 25, + "median": 25.600000000000023, + "max": 33.5 }, "post_interaction_blank_gap_frames": { - "min": 3, - "median": 3, - "max": 3 - }, - "post_interaction_anchor_shift_px": { "min": 0, "median": 0, "max": 0 }, - "post_interaction_row_height_error_p95_px": { + "post_interaction_anchor_shift_px": { "min": 0, "median": 0, "max": 0 }, + "post_interaction_row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, "result_row_count": { "min": 3000, "median": 3000, @@ -1517,27 +1723,124 @@ "max": 0 }, "dom_nodes_peak": { - "min": 499, - "median": 499, - "max": 499 + "min": 225, + "median": 225, + "max": 225 }, "rendered_rows_peak": { - "min": 11, - "median": 11, - "max": 11 + "min": 10, + "median": 10, + "max": 10 }, "rendered_cells_peak": { - "min": 440, - "median": 440, - "max": 440 + "min": 70, + "median": 70, + "max": 70 } } - } - ] - }, - { - "id": "H5", - "status": "satisfied", + }, + { + "adapterId": "tanstack", + "adapterFamily": "virtualization-primitive", + "scenarioId": "S2", + "scriptName": "filter-text", + "status": "completed", + "sampleCount": 3, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-text" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain", + "interaction mode: filter-text" + ], + "varying": {} + }, + "metrics": { + "interaction_latency_ms": 9.100000000000023, + "settle_duration_ms": 25.100000000000023, + "post_interaction_blank_gap_frames": 3, + "post_interaction_anchor_shift_px": 0, + "post_interaction_row_height_error_p95_px": 0, + "result_row_count": 3000, + "selected_row_preserved": 0, + "focused_row_preserved": 0, + "dom_nodes_peak": 499, + "rendered_rows_peak": 11, + "rendered_cells_peak": 440 + }, + "metricSummary": { + "interaction_latency_ms": { + "min": 8.399999999999977, + "median": 9.100000000000023, + "max": 16.599999999999966 + }, + "settle_duration_ms": { + "min": 23.400000000000034, + "median": 25.100000000000023, + "max": 32.89999999999998 + }, + "post_interaction_blank_gap_frames": { + "min": 3, + "median": 3, + "max": 3 + }, + "post_interaction_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "post_interaction_row_height_error_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "result_row_count": { + "min": 3000, + "median": 3000, + "max": 3000 + }, + "selected_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "focused_row_preserved": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 499, + "median": 499, + "max": 499 + }, + "rendered_rows_peak": { + "min": 11, + "median": 11, + "max": 11 + }, + "rendered_cells_peak": { + "min": 440, + "median": 440, + "max": 440 + } + } + } + ] + }, + { + "id": "H5", + "status": "satisfied", "summary": "The matrix run writes summary JSON, trace artifacts, and a machine-readable hypothesis report from one command.", "evidence": [ { @@ -1549,25 +1852,25 @@ { "scenarioId": "S2", "scriptName": "filter-metadata", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-metadata-2026-05-13t00-48-07-864z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-metadata", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-metadata-2026-05-13t00-48-22-810z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-metadata", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-metadata-2026-05-13t00-50-05-982z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-metadata", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-metadata-2026-05-13t00-50-20-381z.summary.json" }, { @@ -1579,25 +1882,25 @@ { "scenarioId": "S2", "scriptName": "filter-text", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-text-2026-05-13t00-48-09-712z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-text", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-text-2026-05-13t00-48-24-751z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-text", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-text-2026-05-13t00-50-07-806z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-text", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-filter-text-2026-05-13t00-50-22-188z.summary.json" }, { @@ -1609,31 +1912,31 @@ { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-2026-05-13t00-48-03-848z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-2026-05-13t00-48-18-635z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-2026-05-13t00-50-01-983z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-2026-05-13t00-50-16-393z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 5, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-2026-05-13t00-50-30-888z.summary.json" }, { @@ -1645,25 +1948,25 @@ { "scenarioId": "S2", "scriptName": "scroll-with-format", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-format-2026-05-13t00-48-11-588z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-format", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-format-2026-05-13t00-48-26-683z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-format", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-format-2026-05-13t00-50-09-605z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-format", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-format-2026-05-13t00-50-24-078z.summary.json" }, { @@ -1675,25 +1978,25 @@ { "scenarioId": "S2", "scriptName": "scroll-with-heavy-render", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-48-16-167z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-heavy-render", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-48-31-250z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-heavy-render", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-50-14-086z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-heavy-render", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-50-28-564z.summary.json" }, { @@ -1705,25 +2008,25 @@ { "scenarioId": "S2", "scriptName": "scroll-with-render", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-render-2026-05-13t00-48-13-839z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-render", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-render-2026-05-13t00-48-28-938z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-render", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-render-2026-05-13t00-50-11-802z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-render", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-scroll-with-render-2026-05-13t00-50-26-273z.summary.json" }, { @@ -1735,67 +2038,193 @@ { "scenarioId": "S2", "scriptName": "sort", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-sort-2026-05-13t00-48-06-040z.summary.json" }, { "scenarioId": "S2", "scriptName": "sort", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-sort-2026-05-13t00-48-20-981z.summary.json" }, { "scenarioId": "S2", "scriptName": "sort", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-sort-2026-05-13t00-50-04-168z.summary.json" }, { "scenarioId": "S2", "scriptName": "sort", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-sort-2026-05-13t00-50-18-562z.summary.json" }, { "scenarioId": "S2", "scriptName": "sort", - "repeatIndex": 0, + "repeatIndex": 5, "summaryPath": "status/chromium-ag-grid-default-s2-hypothesis-sort-2026-05-13t00-50-33-082z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-metadata", "repeatIndex": 0, - "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-47-12-425z.summary.json" + "summaryPath": "status/chromium-mui-default-s2-hypothesis-filter-metadata-2026-05-13t00-52-03-193z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-metadata", + "repeatIndex": 1, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-filter-metadata-2026-05-13t00-52-17-470z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 2, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-filter-metadata-2026-05-13t00-52-31-619z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", "repeatIndex": 0, - "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-47-25-911z.summary.json" + "summaryPath": "status/chromium-mui-default-s2-hypothesis-filter-text-2026-05-13t00-52-05-035z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 1, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-filter-text-2026-05-13t00-52-19-353z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 2, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-filter-text-2026-05-13t00-52-33-490z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 0, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-scroll-2026-05-13t00-51-59-207z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 1, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-scroll-2026-05-13t00-52-13-444z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 2, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-scroll-2026-05-13t00-52-27-626z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 0, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-scroll-with-format-2026-05-13t00-52-06-917z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 1, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-scroll-with-format-2026-05-13t00-52-21-191z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 2, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-scroll-with-format-2026-05-13t00-52-35-353z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-52-11-305z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 1, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-52-25-486z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 2, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-52-39-631z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 0, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-scroll-with-render-2026-05-13t00-52-09-120z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 1, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-scroll-with-render-2026-05-13t00-52-23-338z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 2, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-scroll-with-render-2026-05-13t00-52-37-479z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 0, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-sort-2026-05-13t00-52-01-344z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 1, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-sort-2026-05-13t00-52-15-602z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 2, + "summaryPath": "status/chromium-mui-default-s2-hypothesis-sort-2026-05-13t00-52-29-771z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-metadata", "repeatIndex": 0, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-47-12-425z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 1, + "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-47-25-911z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 2, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-47-39-641z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-metadata", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-49-23-997z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-metadata", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-49-37-998z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-metadata", - "repeatIndex": 0, + "repeatIndex": 5, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-metadata-2026-05-13t00-49-52-158z.summary.json" }, { @@ -1807,31 +2236,31 @@ { "scenarioId": "S2", "scriptName": "filter-text", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-text-2026-05-13t00-47-27-664z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-text", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-text-2026-05-13t00-47-41-447z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-text", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-text-2026-05-13t00-49-25-766z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-text", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-text-2026-05-13t00-49-39-900z.summary.json" }, { "scenarioId": "S2", "scriptName": "filter-text", - "repeatIndex": 0, + "repeatIndex": 5, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-filter-text-2026-05-13t00-49-53-940z.summary.json" }, { @@ -1843,37 +2272,37 @@ { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-2026-05-13t00-47-22-098z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-2026-05-13t00-47-35-734z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-2026-05-13t00-48-39-389z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-2026-05-13t00-49-20-113z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 5, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-2026-05-13t00-49-33-950z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 6, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-2026-05-13t00-49-48-192z.summary.json" }, { @@ -1885,31 +2314,31 @@ { "scenarioId": "S2", "scriptName": "scroll-with-format", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-format-2026-05-13t00-47-29-444z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-format", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-format-2026-05-13t00-47-43-276z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-format", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-format-2026-05-13t00-49-27-536z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-format", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-format-2026-05-13t00-49-41-798z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-format", - "repeatIndex": 0, + "repeatIndex": 5, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-format-2026-05-13t00-49-55-730z.summary.json" }, { @@ -1921,31 +2350,31 @@ { "scenarioId": "S2", "scriptName": "scroll-with-heavy-render", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-47-33-599z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-heavy-render", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-47-47-476z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-heavy-render", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-49-31-827z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-heavy-render", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-49-46-095z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-heavy-render", - "repeatIndex": 0, + "repeatIndex": 5, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-49-59-931z.summary.json" }, { @@ -1957,31 +2386,31 @@ { "scenarioId": "S2", "scriptName": "scroll-with-render", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-render-2026-05-13t00-47-31-495z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-render", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-render-2026-05-13t00-47-45-377z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-render", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-render-2026-05-13t00-49-29-654z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-render", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-render-2026-05-13t00-49-43-990z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll-with-render", - "repeatIndex": 0, + "repeatIndex": 5, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-scroll-with-render-2026-05-13t00-49-57-823z.summary.json" }, { @@ -1993,37 +2422,37 @@ { "scenarioId": "S2", "scriptName": "sort", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-sort-2026-05-13t00-47-24-158z.summary.json" }, { "scenarioId": "S2", "scriptName": "sort", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-sort-2026-05-13t00-47-37-849z.summary.json" }, { "scenarioId": "S2", "scriptName": "sort", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-sort-2026-05-13t00-48-41-501z.summary.json" }, { "scenarioId": "S2", "scriptName": "sort", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-sort-2026-05-13t00-49-22-232z.summary.json" }, { "scenarioId": "S2", "scriptName": "sort", - "repeatIndex": 0, + "repeatIndex": 5, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-sort-2026-05-13t00-49-36-106z.summary.json" }, { "scenarioId": "S2", "scriptName": "sort", - "repeatIndex": 0, + "repeatIndex": 6, "summaryPath": "status/chromium-pretable-default-s2-hypothesis-sort-2026-05-13t00-49-50-367z.summary.json" }, { @@ -2032,12 +2461,36 @@ "repeatIndex": 0, "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-filter-metadata-2026-05-13t00-51-18-892z.summary.json" }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 1, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-filter-metadata-2026-05-13t00-51-33-848z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-metadata", + "repeatIndex": 2, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-filter-metadata-2026-05-13t00-51-48-589z.summary.json" + }, { "scenarioId": "S2", "scriptName": "filter-text", "repeatIndex": 0, "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-filter-text-2026-05-13t00-51-20-715z.summary.json" }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 1, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-filter-text-2026-05-13t00-51-35-662z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "filter-text", + "repeatIndex": 2, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-filter-text-2026-05-13t00-51-50-419z.summary.json" + }, { "scenarioId": "S2", "scriptName": "scroll", @@ -2047,45 +2500,87 @@ { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-2026-05-13t00-49-42-275z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-2026-05-13t00-50-56-654z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 3, "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-2026-05-13t00-51-14-956z.summary.json" }, { "scenarioId": "S2", "scriptName": "scroll", - "repeatIndex": 0, + "repeatIndex": 4, "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-2026-05-13t00-51-29-737z.summary.json" }, + { + "scenarioId": "S2", + "scriptName": "scroll", + "repeatIndex": 5, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-2026-05-13t00-51-44-453z.summary.json" + }, { "scenarioId": "S2", "scriptName": "scroll-with-format", "repeatIndex": 0, "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-with-format-2026-05-13t00-51-22-515z.summary.json" }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 1, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-with-format-2026-05-13t00-51-37-475z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "repeatIndex": 2, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-with-format-2026-05-13t00-51-52-309z.summary.json" + }, { "scenarioId": "S2", "scriptName": "scroll-with-heavy-render", "repeatIndex": 0, "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-51-27-192z.summary.json" }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 1, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-51-42-031z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "repeatIndex": 2, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-with-heavy-render-2026-05-13t00-51-56-785z.summary.json" + }, { "scenarioId": "S2", "scriptName": "scroll-with-render", "repeatIndex": 0, "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-with-render-2026-05-13t00-51-24-727z.summary.json" }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 1, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-with-render-2026-05-13t00-51-39-693z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "repeatIndex": 2, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-scroll-with-render-2026-05-13t00-51-54-521z.summary.json" + }, { "scenarioId": "S2", "scriptName": "sort", @@ -2095,14 +2590,26 @@ { "scenarioId": "S2", "scriptName": "sort", - "repeatIndex": 0, + "repeatIndex": 1, "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-sort-2026-05-13t00-50-58-779z.summary.json" }, { "scenarioId": "S2", "scriptName": "sort", - "repeatIndex": 0, + "repeatIndex": 2, "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-sort-2026-05-13t00-51-17-111z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 3, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-sort-2026-05-13t00-51-31-999z.summary.json" + }, + { + "scenarioId": "S2", + "scriptName": "sort", + "repeatIndex": 4, + "summaryPath": "status/chromium-tanstack-default-s2-hypothesis-sort-2026-05-13t00-51-46-701z.summary.json" } ] }, @@ -2345,9 +2852,110 @@ "max": 10.300000000000011 }, "blank_gap_frames": { - "min": 0, - "median": 0, - "max": 0 + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 98, + "median": 98, + "max": 98 + }, + "scroll_viewport_nodes_peak": { + "min": 93, + "median": 93, + "max": 93 + }, + "rendered_rows_peak": { + "min": 11, + "median": 11, + "max": 11 + }, + "rendered_cells_peak": { + "min": 66, + "median": 66, + "max": 66 + }, + "row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + }, + { + "adapterId": "ag-grid", + "adapterFamily": "full-grid", + "scenarioId": "S2", + "scriptName": "scroll-with-format", + "status": "completed", + "sampleCount": 5, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 17.899999999999977, + "blank_gap_frames": 1, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 412, + "scroll_viewport_nodes_peak": 195, + "rendered_rows_peak": 27, + "rendered_cells_peak": 162, + "row_height_error_p95_px": 2, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 16.90000000000009, + "median": 17.899999999999977, + "max": 24.600000000000023 + }, + "blank_gap_frames": { + "min": 1, + "median": 1, + "max": 1 }, "long_tasks_count": { "min": 0, @@ -2360,29 +2968,29 @@ "max": 0 }, "dom_nodes_peak": { - "min": 98, - "median": 98, - "max": 98 + "min": 412, + "median": 412, + "max": 412 }, "scroll_viewport_nodes_peak": { - "min": 93, - "median": 93, - "max": 93 + "min": 195, + "median": 195, + "max": 195 }, "rendered_rows_peak": { - "min": 11, - "median": 11, - "max": 11 + "min": 27, + "median": 27, + "max": 27 }, "rendered_cells_peak": { - "min": 66, - "median": 66, - "max": 66 + "min": 162, + "median": 162, + "max": 162 }, "row_height_error_p95_px": { - "min": 1, - "median": 1, - "max": 1 + "min": 2, + "median": 2, + "max": 2 }, "scroll_anchor_shift_px": { "min": 0, @@ -2402,12 +3010,12 @@ } }, { - "adapterId": "ag-grid", + "adapterId": "mui", "adapterFamily": "full-grid", "scenarioId": "S2", "scriptName": "scroll-with-format", "status": "completed", - "sampleCount": 5, + "sampleCount": 3, "policyNotes": { "common": [ "contain: none", @@ -2426,29 +3034,29 @@ "varying": {} }, "metrics": { - "scroll_frame_p95_ms": 17.899999999999977, - "blank_gap_frames": 1, + "scroll_frame_p95_ms": 10.300000000000011, + "blank_gap_frames": 0, "long_tasks_count": 0, "long_tasks_ms": 0, - "dom_nodes_peak": 412, - "scroll_viewport_nodes_peak": 195, - "rendered_rows_peak": 27, - "rendered_cells_peak": 162, - "row_height_error_p95_px": 2, + "dom_nodes_peak": 251, + "scroll_viewport_nodes_peak": 239, + "rendered_rows_peak": 22, + "rendered_cells_peak": 115, + "row_height_error_p95_px": 1, "scroll_anchor_shift_px": 0, "scroll_anchor_shift_forward_p95_px": 0, "scroll_anchor_shift_backward_p95_px": 0 }, "metricSummary": { "scroll_frame_p95_ms": { - "min": 16.90000000000009, - "median": 17.899999999999977, - "max": 24.600000000000023 + "min": 10, + "median": 10.300000000000011, + "max": 10.399999999999977 }, "blank_gap_frames": { - "min": 1, - "median": 1, - "max": 1 + "min": 0, + "median": 0, + "max": 0 }, "long_tasks_count": { "min": 0, @@ -2461,29 +3069,29 @@ "max": 0 }, "dom_nodes_peak": { - "min": 412, - "median": 412, - "max": 412 + "min": 251, + "median": 251, + "max": 251 }, "scroll_viewport_nodes_peak": { - "min": 195, - "median": 195, - "max": 195 + "min": 239, + "median": 239, + "max": 239 }, "rendered_rows_peak": { - "min": 27, - "median": 27, - "max": 27 + "min": 22, + "median": 22, + "max": 22 }, "rendered_cells_peak": { - "min": 162, - "median": 162, - "max": 162 + "min": 115, + "median": 115, + "max": 115 }, "row_height_error_p95_px": { - "min": 2, - "median": 2, - "max": 2 + "min": 1, + "median": 1, + "max": 1 }, "scroll_anchor_shift_px": { "min": 0, @@ -2508,7 +3116,7 @@ "scenarioId": "S2", "scriptName": "scroll-with-format", "status": "completed", - "sampleCount": 1, + "sampleCount": 3, "policyNotes": { "common": [ "contain: none", @@ -2527,7 +3135,7 @@ "varying": {} }, "metrics": { - "scroll_frame_p95_ms": 16.700000000000045, + "scroll_frame_p95_ms": 16.799999999999955, "blank_gap_frames": 1, "long_tasks_count": 0, "long_tasks_ms": 0, @@ -2543,8 +3151,8 @@ "metricSummary": { "scroll_frame_p95_ms": { "min": 16.700000000000045, - "median": 16.700000000000045, - "max": 16.700000000000045 + "median": 16.799999999999955, + "max": 17 }, "blank_gap_frames": { "min": 1, @@ -2826,13 +3434,114 @@ } } }, + { + "adapterId": "mui", + "adapterFamily": "full-grid", + "scenarioId": "S2", + "scriptName": "scroll-with-render", + "status": "completed", + "sampleCount": 3, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 10.300000000000011, + "blank_gap_frames": 0, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 345, + "scroll_viewport_nodes_peak": 333, + "rendered_rows_peak": 22, + "rendered_cells_peak": 115, + "row_height_error_p95_px": 1, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 10.199999999999989, + "median": 10.300000000000011, + "max": 16.80000000000001 + }, + "blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 345, + "median": 345, + "max": 345 + }, + "scroll_viewport_nodes_peak": { + "min": 333, + "median": 333, + "max": 333 + }, + "rendered_rows_peak": { + "min": 22, + "median": 22, + "max": 22 + }, + "rendered_cells_peak": { + "min": 115, + "median": 115, + "max": 115 + }, + "row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + }, { "adapterId": "tanstack", "adapterFamily": "virtualization-primitive", "scenarioId": "S2", "scriptName": "scroll-with-render", "status": "completed", - "sampleCount": 1, + "sampleCount": 3, "policyNotes": { "common": [ "contain: none", @@ -2851,7 +3560,7 @@ "varying": {} }, "metrics": { - "scroll_frame_p95_ms": 17.699999999999932, + "scroll_frame_p95_ms": 17.599999999999966, "blank_gap_frames": 1, "long_tasks_count": 0, "long_tasks_ms": 0, @@ -2866,8 +3575,8 @@ }, "metricSummary": { "scroll_frame_p95_ms": { - "min": 17.699999999999932, - "median": 17.699999999999932, + "min": 17.5, + "median": 17.599999999999966, "max": 17.699999999999932 }, "blank_gap_frames": { @@ -3150,13 +3859,114 @@ } } }, + { + "adapterId": "mui", + "adapterFamily": "full-grid", + "scenarioId": "S2", + "scriptName": "scroll-with-heavy-render", + "status": "completed", + "sampleCount": 3, + "policyNotes": { + "common": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "union": [ + "contain: none", + "content visibility: visible", + "contain intrinsic size: none", + "scroll anchoring: none", + "overscroll behavior: contain" + ], + "varying": {} + }, + "metrics": { + "scroll_frame_p95_ms": 10.300000000000011, + "blank_gap_frames": 0, + "long_tasks_count": 0, + "long_tasks_ms": 0, + "dom_nodes_peak": 533, + "scroll_viewport_nodes_peak": 521, + "rendered_rows_peak": 22, + "rendered_cells_peak": 115, + "row_height_error_p95_px": 1, + "scroll_anchor_shift_px": 0, + "scroll_anchor_shift_forward_p95_px": 0, + "scroll_anchor_shift_backward_p95_px": 0 + }, + "metricSummary": { + "scroll_frame_p95_ms": { + "min": 10.300000000000011, + "median": 10.300000000000011, + "max": 10.300000000000011 + }, + "blank_gap_frames": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_count": { + "min": 0, + "median": 0, + "max": 0 + }, + "long_tasks_ms": { + "min": 0, + "median": 0, + "max": 0 + }, + "dom_nodes_peak": { + "min": 533, + "median": 533, + "max": 533 + }, + "scroll_viewport_nodes_peak": { + "min": 521, + "median": 521, + "max": 521 + }, + "rendered_rows_peak": { + "min": 22, + "median": 22, + "max": 22 + }, + "rendered_cells_peak": { + "min": 115, + "median": 115, + "max": 115 + }, + "row_height_error_p95_px": { + "min": 1, + "median": 1, + "max": 1 + }, + "scroll_anchor_shift_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_forward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + }, + "scroll_anchor_shift_backward_p95_px": { + "min": 0, + "median": 0, + "max": 0 + } + } + }, { "adapterId": "tanstack", "adapterFamily": "virtualization-primitive", "scenarioId": "S2", "scriptName": "scroll-with-heavy-render", "status": "completed", - "sampleCount": 1, + "sampleCount": 3, "policyNotes": { "common": [ "contain: none", @@ -3175,7 +3985,7 @@ "varying": {} }, "metrics": { - "scroll_frame_p95_ms": 24.80000000000001, + "scroll_frame_p95_ms": 18.100000000000023, "blank_gap_frames": 1, "long_tasks_count": 0, "long_tasks_ms": 0, @@ -3190,8 +4000,8 @@ }, "metricSummary": { "scroll_frame_p95_ms": { - "min": 24.80000000000001, - "median": 24.80000000000001, + "min": 16.80000000000001, + "median": 18.100000000000023, "max": 24.80000000000001 }, "blank_gap_frames": { From 329c00f88811b9c02f4a82b440de5f7c1723e3d0 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Tue, 12 May 2026 17:54:08 -0700 Subject: [PATCH 11/13] docs(research): correct repo-memory after successful matrix retry All four adapters (pretable + ag-grid + tanstack + mui) are present in every comparator-aware evaluator's evidence array. Updates the 2026-05-12 entry to reflect the recovered matrix outcome. Co-Authored-By: Claude Opus 4.7 --- docs/research/repo-memory.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/research/repo-memory.md b/docs/research/repo-memory.md index 706590d..30c90b1 100644 --- a/docs/research/repo-memory.md +++ b/docs/research/repo-memory.md @@ -509,12 +509,11 @@ Six pretable-only evaluators in `scripts/bench-matrix.mjs` (H6, H7, H8 interacti - Each of H6/H7/H8/H19/H20/H21 appends `...comparatorEvidence` to its `evidence:` array in every return branch (insufficient / failing / satisfied / directional). For `insufficient` branches without pretable data, the evidence array stays empty — comparator data alone doesn't satisfy any hypothesis. - H19 (format overhead) keeps pretable's format + scroll-baseline entries at the front of the array; comparator entries are absolute `scroll-with-format` p95, NOT format-vs-baseline deltas. Inline docblock documents the semantics so future readers don't conflate the two. - Six new test cases in `scripts/__tests__/bench-matrix.test.mjs` assert evidence-array contents when comparator runs are present. All existing status-verdict tests untouched. -- Matrix re-run intended at 4 adapters × 7 scripts × 3 repeats = 84 runs. The matrix runner's end-of-run report-writer flaked repeatedly in this worktree (port-binding issue surfaced as a side effect of multiple matrix invocations stacking up); synthesized the milestone from per-run summaries by calling `createHypothesisReport` directly. Pretable + ag-grid + tanstack data captured per evaluator; MUI flaked entirely and is absent from the milestone — evaluator correctly handles whatever comparator data is present per-slice. +- Matrix re-run at 4 adapters × 7 scripts × 3 repeats = 84 runs. The matrix runner bailed mid-run twice (one tanstack/filter-metadata locator-timing flake, one preview-server `ECONNREFUSED`); recovered by running the surviving adapters (`tanstack,mui`) as a second invocation, then synthesizing the milestone from all on-disk per-run summaries via a one-shot script that called `createHypothesisReport` directly. All four adapters are present in every H6/H7/H8/H19/H20/H21 evidence array. - Milestone: `status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json`. All seven hypotheses (H1, H6–H8, H19–H21) retained `satisfied` status — architectural change was data-only. ### Out of scope (deferred) - **`/bench` page swap to read from `hypotheses.json` directly.** Aggregator scripts (`scripts/extract-interaction-summary.mjs` + the inline aggregators) still feed the page; can be retired once the page reads from the new milestone shape. Editorial-only PR. - **Per-adapter format-overhead deltas in H19.** Currently H19's status compares pretable's `scroll-with-format` p95 against pretable's `scroll` baseline; comparator evidence surfaces absolute format p95 only. Computing per-adapter deltas would extend H19 from a pretable-quality check into a comparative-overhead check — a different hypothesis. -- **Matrix runner reliability.** The end-of-run report-writer flake is well-documented across PRs #133, #134, this one. Worth investigating as its own follow-up; for now the synthesized-from-summaries pattern works. -- **MUI matrix coverage.** This milestone has no MUI evidence due to the matrix flake; the evaluator is ready to surface MUI data when the matrix runner is stable. +- **Matrix runner reliability.** Mid-run flakes (locator timeouts, preview-server connection refused) have hit multiple recent PRs (#133, #134, this one). The bail-on-first-failure behavior wastes a 5-minute run when a single repeat flakes; a `--continue-on-error` option plus a runset-merge pathway would be a useful runner enhancement. From f10351a74d2264b59f39c0afaac73f20a7d20103 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Tue, 12 May 2026 17:54:58 -0700 Subject: [PATCH 12/13] chore: prettier-format comparator-aware-evaluators artifacts --- .../2026-05-12-comparator-aware-evaluators.md | 32 +++++++--- ...5-12-comparator-aware-evaluators-design.md | 16 ++--- ...omparator-aware-evaluators.hypotheses.json | 60 +++---------------- 3 files changed, 41 insertions(+), 67 deletions(-) diff --git a/docs/superpowers/plans/2026-05-12-comparator-aware-evaluators.md b/docs/superpowers/plans/2026-05-12-comparator-aware-evaluators.md index 1712911..f15d719 100644 --- a/docs/superpowers/plans/2026-05-12-comparator-aware-evaluators.md +++ b/docs/superpowers/plans/2026-05-12-comparator-aware-evaluators.md @@ -78,6 +78,7 @@ docs/research/ - [ ] **3.1** In `evaluateH6` (around line 613), find the `return { id: "H6", ..., evidence: [...] }` shape. - [ ] **3.2** Just before the return, compute comparator evidence: + ```js const comparatorEvidence = findComparatorEvidence(runs, { scenarioId, @@ -88,6 +89,7 @@ docs/research/ Update each return statement in `evaluateH6` to spread `...comparatorEvidence` after the pretable evidence in the array. There may be multiple returns (insufficient / failing / satisfied branches); update them all. Skeleton (illustrative, adapt to actual evaluator code): + ```js return { id: "H6", @@ -100,9 +102,11 @@ docs/research/ Note: for `insufficient` returns where `pretableEvidence` isn't built (no pretable series), keep the existing `evidence: []` — comparator data alone doesn't satisfy any version of H6. - [ ] **3.3** Run the bench-matrix tests: + ``` node --test scripts/__tests__/bench-matrix.test.mjs ``` + Expected: all existing tests still pass. The evidence array now has more entries but existing tests check status + summary, not evidence-length. - [ ] **3.4** Commit: @@ -133,6 +137,7 @@ docs/research/ - [ ] **6.1** H19 is structured around comparing pretable's `scroll-with-format` p95 against pretable's `scroll` baseline. The comparator extension surfaces each comparator's `scroll-with-format` evidence (not deltas — see spec's non-goals; per-adapter format-vs-baseline deltas are a future enhancement). In `evaluateH19`, add the comparator lookup: + ```js const comparatorEvidence = findComparatorEvidence(runs, { scenarioId: "S2", @@ -143,6 +148,7 @@ docs/research/ Append `...comparatorEvidence` to each return's `evidence` array. Keep the existing pretable format + pretable scroll baseline entries at the front. Add a comment near the evidence array clarifying the semantics: + ```js // evidence shape: [pretable format-overhead summary, pretable scroll // baseline summary, ...comparator scroll-with-format absolute summaries]. @@ -188,9 +194,11 @@ docs/research/ Adapt the test helper invocation to whatever `createInteractionRun` / `createScrollRun` factory the test file already has. Read the file for the existing helper before writing the test. - [ ] **9.2** Run all matrix-runner tests: + ``` node --test scripts/__tests__/bench-matrix.test.mjs ``` + Expected: 6 new tests pass; all existing tests still pass. - [ ] **9.3** Commit: @@ -202,11 +210,13 @@ docs/research/ ## Task 10 — Matrix re-run - [ ] **10.1** Build the harness: + ``` pnpm --filter @pretable/app-bench build ``` - [ ] **10.2** Run the matrix: + ``` pnpm bench:matrix \ --project=chromium \ @@ -220,6 +230,7 @@ docs/research/ Use `Bash run_in_background: true` since this is ~5 min wall-clock. 7 scripts × 4 adapters × 3 repeats = 84 runs. - [ ] **10.3** Wait for the matrix to complete (poll sparingly via `pgrep -f bench-matrix`). When done, locate the runset: + ``` ls -lt status/runsets/ | head -3 ``` @@ -232,11 +243,13 @@ docs/research/ - If any status flips unexpectedly, STOP and report DONE_WITH_CONCERNS — don't change thresholds. - [ ] **10.5** Copy the runset to the milestone path: + ``` cp status/runsets//hypotheses.json status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json ``` - [ ] **10.6** Commit: + ``` git add status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json git commit -m "chore(bench): matrix milestone for comparator-aware evaluators @@ -263,12 +276,15 @@ docs/research/ ## Task 12 — Gates + PR - [ ] **12.1** Repo-wide gates: + ``` pnpm -w typecheck && pnpm -w test && pnpm -w lint && pnpm format ``` + Expected: all pass. The evaluator changes are JS in `scripts/`; typecheck doesn't cover them but lint does. - [ ] **12.2** Push + open PR: + ``` git push -u origin comparator-aware-evaluators gh pr create --title "feat(bench-matrix): H6-H8 + H19-H21 evaluators embed comparator evidence" --body "..." @@ -284,14 +300,14 @@ docs/research/ ## Self-review -| Spec section | Plan task | -| --- | --- | -| Evaluator extension pattern | Tasks 2 (helper) + 3–8 (per-evaluator) | -| Per-evaluator slice definitions | Tasks 3–8 use the right (scenarioId, scriptName) tuple | -| Test updates | Task 9 | -| Matrix re-run | Task 10 | -| Sanity check on verdicts | Task 10.4 | -| H19 format-overhead semantics drift | Task 6.1 inline comment | +| Spec section | Plan task | +| ----------------------------------- | ------------------------------------------------------ | +| Evaluator extension pattern | Tasks 2 (helper) + 3–8 (per-evaluator) | +| Per-evaluator slice definitions | Tasks 3–8 use the right (scenarioId, scriptName) tuple | +| Test updates | Task 9 | +| Matrix re-run | Task 10 | +| Sanity check on verdicts | Task 10.4 | +| H19 format-overhead semantics drift | Task 6.1 inline comment | All sections covered. diff --git a/docs/superpowers/specs/2026-05-12-comparator-aware-evaluators-design.md b/docs/superpowers/specs/2026-05-12-comparator-aware-evaluators-design.md index 5cab6e7..9831a1d 100644 --- a/docs/superpowers/specs/2026-05-12-comparator-aware-evaluators-design.md +++ b/docs/superpowers/specs/2026-05-12-comparator-aware-evaluators-design.md @@ -48,14 +48,14 @@ The comparator lookup mirrors `evaluateH1`'s `groupRunSeries(runs, { scenarioId, ### Per-evaluator slice definitions -| Evaluator | Scenario | Script | Comparators | -| --- | --- | --- | --- | -| `evaluateH6` | S2 | `sort` | ag-grid, tanstack, mui | -| `evaluateH7` | S2 | `filter-metadata` | ag-grid, tanstack, mui | -| `evaluateH8` | S2 | `filter-text` | ag-grid, tanstack, mui | -| `evaluateH19` | S2 | `scroll-with-format` (compared to `scroll` baseline) | ag-grid, tanstack, mui — but only on `scroll-with-format` slice; baseline stays pretable | -| `evaluateH20` | S2 | `scroll-with-render` | ag-grid, tanstack, mui | -| `evaluateH21` | S2 | `scroll-with-heavy-render` | ag-grid, tanstack, mui | +| Evaluator | Scenario | Script | Comparators | +| ------------- | -------- | ---------------------------------------------------- | ---------------------------------------------------------------------------------------- | +| `evaluateH6` | S2 | `sort` | ag-grid, tanstack, mui | +| `evaluateH7` | S2 | `filter-metadata` | ag-grid, tanstack, mui | +| `evaluateH8` | S2 | `filter-text` | ag-grid, tanstack, mui | +| `evaluateH19` | S2 | `scroll-with-format` (compared to `scroll` baseline) | ag-grid, tanstack, mui — but only on `scroll-with-format` slice; baseline stays pretable | +| `evaluateH20` | S2 | `scroll-with-render` | ag-grid, tanstack, mui | +| `evaluateH21` | S2 | `scroll-with-heavy-render` | ag-grid, tanstack, mui | H19 is the tricky one: its current verdict compares format-overhead (`scroll-with-format`) against a `scroll` baseline, both pretable. The comparator data adds value on the `scroll-with-format` slice (comparator's format overhead vs its own scroll baseline would be a deeper extension; out of scope). For H19 we surface comparator `scroll-with-format` evidence alongside pretable's existing format + baseline; comparators' format-vs-baseline overhead is informational, not gated. diff --git a/status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json b/status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json index 47d4dba..b596b37 100644 --- a/status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json +++ b/status/milestones/2026-05-12-comparator-aware-evaluators.hypotheses.json @@ -20,15 +20,8 @@ } ], "matrix": { - "adapters": [ - "ag-grid", - "mui", - "pretable", - "tanstack" - ], - "scenarios": [ - "S2" - ], + "adapters": ["ag-grid", "mui", "pretable", "tanstack"], + "scenarios": ["S2"], "scripts": [ "filter-metadata", "filter-text", @@ -44,12 +37,7 @@ { "scenarioId": "S2", "scriptName": "filter-metadata", - "adapterIds": [ - "ag-grid", - "mui", - "pretable", - "tanstack" - ], + "adapterIds": ["ag-grid", "mui", "pretable", "tanstack"], "policyNotes": { "common": [ "contain: none", @@ -80,12 +68,7 @@ { "scenarioId": "S2", "scriptName": "filter-text", - "adapterIds": [ - "ag-grid", - "mui", - "pretable", - "tanstack" - ], + "adapterIds": ["ag-grid", "mui", "pretable", "tanstack"], "policyNotes": { "common": [ "contain: none", @@ -116,12 +99,7 @@ { "scenarioId": "S2", "scriptName": "scroll", - "adapterIds": [ - "ag-grid", - "mui", - "pretable", - "tanstack" - ], + "adapterIds": ["ag-grid", "mui", "pretable", "tanstack"], "policyNotes": { "common": [ "contain: none", @@ -150,12 +128,7 @@ { "scenarioId": "S2", "scriptName": "scroll-with-format", - "adapterIds": [ - "ag-grid", - "mui", - "pretable", - "tanstack" - ], + "adapterIds": ["ag-grid", "mui", "pretable", "tanstack"], "policyNotes": { "common": [ "contain: none", @@ -184,12 +157,7 @@ { "scenarioId": "S2", "scriptName": "scroll-with-heavy-render", - "adapterIds": [ - "ag-grid", - "mui", - "pretable", - "tanstack" - ], + "adapterIds": ["ag-grid", "mui", "pretable", "tanstack"], "policyNotes": { "common": [ "contain: none", @@ -218,12 +186,7 @@ { "scenarioId": "S2", "scriptName": "scroll-with-render", - "adapterIds": [ - "ag-grid", - "mui", - "pretable", - "tanstack" - ], + "adapterIds": ["ag-grid", "mui", "pretable", "tanstack"], "policyNotes": { "common": [ "contain: none", @@ -252,12 +215,7 @@ { "scenarioId": "S2", "scriptName": "sort", - "adapterIds": [ - "ag-grid", - "mui", - "pretable", - "tanstack" - ], + "adapterIds": ["ag-grid", "mui", "pretable", "tanstack"], "policyNotes": { "common": [ "contain: none", From d3e0fcdfba77307554154f34c42b7df96e054784 Mon Sep 17 00:00:00 2001 From: Brian Love Date: Tue, 12 May 2026 17:55:29 -0700 Subject: [PATCH 13/13] chore: prettier-format bench-matrix tests --- scripts/__tests__/bench-matrix.test.mjs | 55 +++++++++++++++---------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/scripts/__tests__/bench-matrix.test.mjs b/scripts/__tests__/bench-matrix.test.mjs index d161347..1a13347 100644 --- a/scripts/__tests__/bench-matrix.test.mjs +++ b/scripts/__tests__/bench-matrix.test.mjs @@ -2832,10 +2832,12 @@ test("H6 evidence array includes comparator entries when comparator runs are pre assert.ok(h6); assert.equal(h6.status, "satisfied"); assert.equal(h6.evidence.length, 4); - assert.deepEqual( - h6.evidence.map((entry) => entry.adapterId).sort(), - ["ag-grid", "mui", "pretable", "tanstack"], - ); + assert.deepEqual(h6.evidence.map((entry) => entry.adapterId).sort(), [ + "ag-grid", + "mui", + "pretable", + "tanstack", + ]); }); test("H7 evidence array includes comparator entries when comparator runs are present", () => { @@ -2844,10 +2846,12 @@ test("H7 evidence array includes comparator entries when comparator runs are pre assert.ok(h7); assert.equal(h7.status, "satisfied"); assert.equal(h7.evidence.length, 4); - assert.deepEqual( - h7.evidence.map((entry) => entry.adapterId).sort(), - ["ag-grid", "mui", "pretable", "tanstack"], - ); + assert.deepEqual(h7.evidence.map((entry) => entry.adapterId).sort(), [ + "ag-grid", + "mui", + "pretable", + "tanstack", + ]); }); test("H8 evidence array includes comparator entries when comparator runs are present", () => { @@ -2856,10 +2860,12 @@ test("H8 evidence array includes comparator entries when comparator runs are pre assert.ok(h8); assert.equal(h8.status, "satisfied"); assert.equal(h8.evidence.length, 4); - assert.deepEqual( - h8.evidence.map((entry) => entry.adapterId).sort(), - ["ag-grid", "mui", "pretable", "tanstack"], - ); + assert.deepEqual(h8.evidence.map((entry) => entry.adapterId).sort(), [ + "ag-grid", + "mui", + "pretable", + "tanstack", + ]); }); test("H19 evidence array includes comparator format entries when comparator runs are present", () => { @@ -2900,7 +2906,10 @@ test("H19 evidence array includes comparator format entries when comparator runs // [pretable format, pretable scroll baseline, ag-grid, tanstack, mui] assert.equal(result.evidence.length, 5); assert.deepEqual( - result.evidence.slice(2).map((entry) => entry.adapterId).sort(), + result.evidence + .slice(2) + .map((entry) => entry.adapterId) + .sort(), ["ag-grid", "mui", "tanstack"], ); }); @@ -2918,10 +2927,12 @@ test("H20 evidence array includes comparator entries when comparator runs are pr const result = evaluateH20(runs); assert.equal(result.status, "satisfied"); assert.equal(result.evidence.length, 4); - assert.deepEqual( - result.evidence.map((entry) => entry.adapterId).sort(), - ["ag-grid", "mui", "pretable", "tanstack"], - ); + assert.deepEqual(result.evidence.map((entry) => entry.adapterId).sort(), [ + "ag-grid", + "mui", + "pretable", + "tanstack", + ]); }); test("H21 evidence array includes comparator entries when comparator runs are present", () => { @@ -2937,8 +2948,10 @@ test("H21 evidence array includes comparator entries when comparator runs are pr const result = evaluateH21(runs); assert.equal(result.status, "satisfied"); assert.equal(result.evidence.length, 4); - assert.deepEqual( - result.evidence.map((entry) => entry.adapterId).sort(), - ["ag-grid", "mui", "pretable", "tanstack"], - ); + assert.deepEqual(result.evidence.map((entry) => entry.adapterId).sort(), [ + "ag-grid", + "mui", + "pretable", + "tanstack", + ]); });