From 08d60d034daf37f8f6b8aaa2776aa9378a39b2a8 Mon Sep 17 00:00:00 2001
From: hyeokjun32 <ksjm0417@naver.com>
Date: Fri, 1 May 2026 17:32:31 +0900
Subject: [PATCH] feat: add validation problem demo cases

---
 README.md                                     |  2 +
 docs/portfolio/inferedge_pipeline_status.md   |  2 +
 .../inferedge_portfolio_submission.md         |  1 +
 docs/portfolio/validation_problem_cases.md    | 24 ++++++++
 .../validation_demo/problem_cases/README.md   | 12 ++++
 .../annotation_missing_report.json            | 48 +++++++++++++++
 .../contract_shape_mismatch_report.json       | 48 +++++++++++++++
 .../invalid_detection_structure_report.json   | 61 +++++++++++++++++++
 inferedgelab/studio/routes.py                 | 43 ++++++++++++-
 inferedgelab/studio/static/app.js             | 35 +++++++++++
 inferedgelab/studio/static/index.html         |  9 +--
 inferedgelab/studio/static/style.css          | 30 +++++++++
 tests/test_studio_routes.py                   | 19 ++++--
 tests/test_validation_demo_report.py          | 22 +++++++
 14 files changed, 347 insertions(+), 9 deletions(-)
 create mode 100644 docs/portfolio/validation_problem_cases.md
 create mode 100644 examples/validation_demo/problem_cases/README.md
 create mode 100644 examples/validation_demo/problem_cases/annotation_missing_report.json
 create mode 100644 examples/validation_demo/problem_cases/contract_shape_mismatch_report.json
 create mode 100644 examples/validation_demo/problem_cases/invalid_detection_structure_report.json

diff --git a/README.md b/README.md
index 091da17..3a7724a 100644
--- a/README.md
+++ b/README.md
@@ -84,6 +84,7 @@ TensorRT Jetson was 4.6x faster than ONNX Runtime CPU in this real image input b
 The benchmark uses end-to-end Runtime latency, not trtexec GPU-only latency.
 The full pipeline portfolio summary is available at [docs/portfolio/inferedge_pipeline_portfolio.md](docs/portfolio/inferedge_pipeline_portfolio.md), and the detailed Runtime comparison report is available at [docs/portfolio/runtime_compare_yolov8n.md](docs/portfolio/runtime_compare_yolov8n.md).
 The YOLOv8 COCO subset accuracy demo is documented in [docs/portfolio/yolov8_coco_subset_evaluation.md](docs/portfolio/yolov8_coco_subset_evaluation.md).
+Validation problem cases are documented in [docs/portfolio/validation_problem_cases.md](docs/portfolio/validation_problem_cases.md).
 
 ## Local Studio Demo Evidence
 
@@ -102,6 +103,7 @@ Verified demo fixture values:
 Studio reports this as a `4.57x` TensorRT speedup for the bundled demo pair.
 AIGuard remains optional in this local Studio path; if Guard evidence is not loaded, the deployment decision explains that the Lab comparison is available but diagnosis evidence is not provided.
 The same demo flow also surfaces a small `yolov8_coco` evaluation report summary: 10 images, 89 ground-truth boxes, mAP@50 `0.1410`, precision `0.2941`, recall `0.1685`, structural validation `passed`.
+It also includes problem-case summaries for annotation-missing review, invalid detection structure blocking, and contract shape mismatch blocking.
 
 ---
 
diff --git a/docs/portfolio/inferedge_pipeline_status.md b/docs/portfolio/inferedge_pipeline_status.md
index 05fa078..ae31c7a 100644
--- a/docs/portfolio/inferedge_pipeline_status.md
+++ b/docs/portfolio/inferedge_pipeline_status.md
@@ -97,6 +97,7 @@ The current cross-repository loop is covered by documentation, fixtures, and smo
 - Lab deployment decision/report evidence smoke for AIGuard worker provenance diagnosis
 - Local Studio local-first workflow UI for viewing Forge -> Runtime -> Lab -> optional AIGuard state, creating in-memory analyze jobs, importing Runtime result JSON, replaying bundled demo evidence, comparing backends, and inspecting Lab-owned deployment decision context
 - YOLOv8 COCO subset evaluation report generated from 10 local images and 89 converted COCO-style person annotations, with mAP@50 0.1410, precision 0.2941, recall 0.1685, and structural validation passed
+- Validation problem case fixtures for annotation-missing review, invalid detection structure blocking, and contract shape mismatch blocking
 
 This means the current product boundary is testable without running the production worker infrastructure.
 
@@ -127,6 +128,7 @@ Demo readiness: `scripts/demo_pipeline_full.sh` now provides a guided end-to-end
 - Guided end-to-end demo entrypoint for portfolio and interview walkthroughs
 - Local Studio at `/studio` for a local-first browser view of Run / Import / Demo Evidence / Compare / Decision / Jetson Helper workflows
 - Contract/preset validation demo with `yolov8_coco`, COCO annotation loading, simplified accuracy metrics, structural validation, and JSON/Markdown/HTML report fixtures
+- Problem-case validation reports that make skipped accuracy, invalid output structure, and contract mismatch visible in Local Studio
 - Cross-repo fixture compatibility across Forge, Runtime, Lab, and AIGuard
 - Rule/evidence based provenance mismatch diagnosis
 
diff --git a/docs/portfolio/inferedge_portfolio_submission.md b/docs/portfolio/inferedge_portfolio_submission.md
index 08c7563..b8b2653 100644
--- a/docs/portfolio/inferedge_portfolio_submission.md
+++ b/docs/portfolio/inferedge_portfolio_submission.md
@@ -113,6 +113,7 @@ Recent validation evidence:
 - Guided demo entrypoint: `scripts/demo_pipeline_full.sh` summarizes the full Forge -> Runtime -> Lab -> optional AIGuard flow and can print the Jetson TensorRT Runtime command without claiming production worker or SaaS readiness.
 - Local Studio demo evidence: `/studio` can load bundled ONNX Runtime CPU and TensorRT Jetson Runtime result fixtures from `examples/studio_demo`, keep the demo pair selectable in Recent jobs while the local server process is alive, and show TensorRT Jetson vs ONNX Runtime CPU comparison in the browser. The fixture-backed evidence records ONNX Runtime CPU at mean 45.4299 ms / p99 49.2128 ms / 22.0119 FPS and TensorRT Jetson at mean 9.9375 ms / p99 15.5231 ms / 100.6293 FPS, a 4.57x TensorRT speedup for this demo pair.
 - YOLOv8 COCO subset evaluation: a 10-image local person-detection subset with 89 ground-truth boxes is converted into a COCO-style annotation fixture and evaluated through the `yolov8_coco` preset. The generated report records mAP@50 0.1410, precision 0.2941, recall 0.1685, and structural validation passed. This is documented as subset workflow evidence, not a full COCO benchmark claim.
+- Validation problem cases: the demo bundle includes annotation-missing, invalid detection structure, and contract shape mismatch reports. These show that InferEdge records review/block evidence explicitly instead of presenting every validation path as successful.
 
 The direct Runtime execution result includes `deployment_decision`. Its `unknown` value is expected before Lab compare/report because the worker response has not yet been compared by Lab.
 
diff --git a/docs/portfolio/validation_problem_cases.md b/docs/portfolio/validation_problem_cases.md
new file mode 100644
index 0000000..a59b9f1
--- /dev/null
+++ b/docs/portfolio/validation_problem_cases.md
@@ -0,0 +1,24 @@
+# Validation Problem Cases
+
+InferEdge does not hide validation failures. These fixtures show how the Lab evidence layer records cases that need review or should be blocked.
+
+## Cases
+
+| Case | Decision Signal | What It Demonstrates |
+|---|---|---|
+| annotation missing | review | Accuracy is intentionally skipped when annotation evidence is unavailable. |
+| invalid detection structure | blocked | Score/bbox structural checks can block malformed detection output. |
+| contract shape mismatch | blocked | Runtime input shape must match the declared `model_contract.json`. |
+
+## Files
+
+- `examples/validation_demo/problem_cases/annotation_missing_report.json`
+- `examples/validation_demo/problem_cases/invalid_detection_structure_report.json`
+- `examples/validation_demo/problem_cases/contract_shape_mismatch_report.json`
+
+## Interpretation
+
+These are deliberately small report fixtures, not production SaaS records.
+They make the portfolio story clearer: InferEdge is a contract/preset validation pipeline, so missing annotations, malformed outputs, and contract mismatches are explicit evidence states.
+
+Local Studio includes these problem cases in the `Load Demo Evidence` flow so the browser demo can show both the happy path and the review/block paths.
diff --git a/examples/validation_demo/problem_cases/README.md b/examples/validation_demo/problem_cases/README.md
new file mode 100644
index 0000000..621ae7a
--- /dev/null
+++ b/examples/validation_demo/problem_cases/README.md
@@ -0,0 +1,12 @@
+# Validation Problem Demo Cases
+
+These fixtures show how InferEdgeLab records uncomfortable validation evidence instead of hiding it.
+They are intentionally small JSON reports and do not include raw images.
+
+| Case | Signal | Reason |
+|---|---|---|
+| `annotation_missing_report.json` | review | Accuracy is skipped because annotation evidence is not provided. |
+| `invalid_detection_structure_report.json` | blocked | Detection output contains invalid score/bbox structure. |
+| `contract_shape_mismatch_report.json` | blocked | Runtime input shape does not match `model_contract.json`. |
+
+These reports are portfolio fixtures, not production SaaS data.
diff --git a/examples/validation_demo/problem_cases/annotation_missing_report.json b/examples/validation_demo/problem_cases/annotation_missing_report.json
new file mode 100644
index 0000000..9372e67
--- /dev/null
+++ b/examples/validation_demo/problem_cases/annotation_missing_report.json
@@ -0,0 +1,48 @@
+{
+  "report_role": "inferedge-evaluation-report",
+  "generated_at": "2026-05-01T00:00:00Z",
+  "problem_case": "annotation_missing",
+  "preset": {
+    "name": "yolov8_coco",
+    "task": "object_detection"
+  },
+  "runtime_result": {
+    "engine": "onnxruntime",
+    "device": "cpu",
+    "sample_count": 10,
+    "actual_input_shape": [1, 3, 640, 640]
+  },
+  "accuracy": {
+    "status": "skipped",
+    "metrics": {
+      "map50": 0.0,
+      "map50_95": 0.0,
+      "f1_score": 0.0,
+      "precision": 0.0,
+      "recall": 0.0
+    },
+    "reason": "annotation_not_provided"
+  },
+  "contract_validation": {
+    "input_shape": {
+      "status": "passed",
+      "actual_shape": [1, 3, 640, 640],
+      "expected_shape": [1, 3, 640, 640]
+    },
+    "preset": "yolov8_coco",
+    "task": "object_detection"
+  },
+  "structural_validation": {
+    "status": "passed",
+    "checked": {
+      "image_count": 10,
+      "detection_count": 51,
+      "num_classes": 80
+    },
+    "issues": []
+  },
+  "deployment_signal": {
+    "decision": "review",
+    "reason": "Accuracy evaluation was skipped because annotations were not provided."
+  }
+}
diff --git a/examples/validation_demo/problem_cases/contract_shape_mismatch_report.json b/examples/validation_demo/problem_cases/contract_shape_mismatch_report.json
new file mode 100644
index 0000000..7f12152
--- /dev/null
+++ b/examples/validation_demo/problem_cases/contract_shape_mismatch_report.json
@@ -0,0 +1,48 @@
+{
+  "report_role": "inferedge-evaluation-report",
+  "generated_at": "2026-05-01T00:00:00Z",
+  "problem_case": "contract_shape_mismatch",
+  "preset": {
+    "name": "yolov8_coco",
+    "task": "object_detection"
+  },
+  "runtime_result": {
+    "engine": "onnxruntime",
+    "device": "cpu",
+    "sample_count": 1,
+    "actual_input_shape": [1, 3, 640, 640]
+  },
+  "accuracy": {
+    "status": "skipped",
+    "metrics": {
+      "map50": 0.0,
+      "map50_95": 0.0,
+      "f1_score": 0.0,
+      "precision": 0.0,
+      "recall": 0.0
+    },
+    "reason": "model_contract_input_shape_mismatch"
+  },
+  "contract_validation": {
+    "input_shape": {
+      "status": "mismatch",
+      "actual_shape": [1, 3, 640, 640],
+      "expected_shape": [1, 3, 320, 320]
+    },
+    "preset": "yolov8_coco",
+    "task": "object_detection"
+  },
+  "structural_validation": {
+    "status": "passed",
+    "checked": {
+      "image_count": 1,
+      "detection_count": 0,
+      "num_classes": 80
+    },
+    "issues": []
+  },
+  "deployment_signal": {
+    "decision": "blocked",
+    "reason": "Actual runtime input shape does not match the model contract."
+  }
+}
diff --git a/examples/validation_demo/problem_cases/invalid_detection_structure_report.json b/examples/validation_demo/problem_cases/invalid_detection_structure_report.json
new file mode 100644
index 0000000..3ec76d8
--- /dev/null
+++ b/examples/validation_demo/problem_cases/invalid_detection_structure_report.json
@@ -0,0 +1,61 @@
+{
+  "report_role": "inferedge-evaluation-report",
+  "generated_at": "2026-05-01T00:00:00Z",
+  "problem_case": "invalid_detection_structure",
+  "preset": {
+    "name": "yolov8_coco",
+    "task": "object_detection"
+  },
+  "runtime_result": {
+    "engine": "onnxruntime",
+    "device": "cpu",
+    "sample_count": 1,
+    "actual_input_shape": [1, 3, 640, 640]
+  },
+  "accuracy": {
+    "status": "skipped",
+    "metrics": {
+      "map50": 0.0,
+      "map50_95": 0.0,
+      "f1_score": 0.0,
+      "precision": 0.0,
+      "recall": 0.0
+    },
+    "reason": "structural_validation_failed_before_accuracy"
+  },
+  "contract_validation": {
+    "input_shape": {
+      "status": "passed",
+      "actual_shape": [1, 3, 640, 640],
+      "expected_shape": [1, 3, 640, 640]
+    },
+    "preset": "yolov8_coco",
+    "task": "object_detection"
+  },
+  "structural_validation": {
+    "status": "failed",
+    "checked": {
+      "image_count": 1,
+      "detection_count": 1,
+      "num_classes": 80
+    },
+    "issues": [
+      {
+        "image_index": 0,
+        "detection_index": 0,
+        "code": "score_out_of_range",
+        "value": 1.42
+      },
+      {
+        "image_index": 0,
+        "detection_index": 0,
+        "code": "bbox_non_positive_size",
+        "value": [320.0, 320.0, -12.0, 48.0]
+      }
+    ]
+  },
+  "deployment_signal": {
+    "decision": "blocked",
+    "reason": "Structural validation found invalid detection output."
+  }
+}
diff --git a/inferedgelab/studio/routes.py b/inferedgelab/studio/routes.py
index 59196e9..e948463 100644
--- a/inferedgelab/studio/routes.py
+++ b/inferedgelab/studio/routes.py
@@ -22,11 +22,17 @@
 STATIC_DIR = Path(__file__).resolve().parent / "static"
 DEMO_EVIDENCE_DIR = Path(__file__).resolve().parents[2] / "examples" / "studio_demo"
 VALIDATION_DEMO_DIR = Path(__file__).resolve().parents[2] / "examples" / "validation_demo" / "subset"
+VALIDATION_PROBLEM_DIR = Path(__file__).resolve().parents[2] / "examples" / "validation_demo" / "problem_cases"
 DEMO_EVIDENCE_FILES = (
     "onnxruntime_cpu_result.json",
     "tensorrt_jetson_result.json",
 )
 DEMO_EVALUATION_REPORT = "yolov8_coco_subset_evaluation.json"
+DEMO_PROBLEM_REPORTS = (
+    "annotation_missing_report.json",
+    "invalid_detection_structure_report.json",
+    "contract_shape_mismatch_report.json",
+)
 DEMO_JOB_ID = "demo_yolov8n_trt_vs_onnx"
 STATIC_ASSETS = {
     "app.js": "application/javascript",
@@ -159,10 +165,11 @@ def studio_import(request: Request, payload: dict[str, Any] = Body(...)) -> dict
 def studio_demo_evidence(request: Request) -> dict[str, Any]:
     results = [_load_demo_result(file_name) for file_name in DEMO_EVIDENCE_FILES]
     evaluation_report = _load_demo_evaluation_report()
+    problem_cases = _load_demo_problem_cases()
     imported_results = _get_imported_results(request)
     imported_results.extend(results)
     compare = _build_imported_compare_response(results[0], results[1])
-    demo_job = _build_demo_job(results, compare, evaluation_report)
+    demo_job = _build_demo_job(results, compare, evaluation_report, problem_cases)
     _get_demo_jobs(request)[DEMO_JOB_ID] = demo_job
     return {
         "status": "loaded",
@@ -174,6 +181,7 @@ def studio_demo_evidence(request: Request) -> dict[str, Any]:
         "compare_ready": True,
         "compare": compare,
         "evaluation_report": evaluation_report,
+        "problem_cases": problem_cases,
         "deployment_decision": compare["deployment_decision"],
     }
 
@@ -327,10 +335,42 @@ def _load_demo_evaluation_report() -> dict[str, Any]:
     }
 
 
+def _load_demo_problem_cases() -> list[dict[str, Any]]:
+    return [_load_problem_report(file_name) for file_name in DEMO_PROBLEM_REPORTS]
+
+
+def _load_problem_report(file_name: str) -> dict[str, Any]:
+    path = VALIDATION_PROBLEM_DIR / file_name
+    try:
+        report = json.loads(path.read_text(encoding="utf-8"))
+    except OSError as exc:
+        raise HTTPException(status_code=500, detail=f"demo problem report not found: {file_name}") from exc
+    except json.JSONDecodeError as exc:
+        raise HTTPException(status_code=500, detail=f"demo problem report is invalid JSON: {file_name}") from exc
+
+    problem_case = report.get("problem_case") if isinstance(report, dict) else None
+    deployment_signal = report.get("deployment_signal") if isinstance(report, dict) else None
+    structural = report.get("structural_validation") if isinstance(report, dict) else None
+    contract = report.get("contract_validation") if isinstance(report, dict) else None
+    accuracy = report.get("accuracy") if isinstance(report, dict) else None
+    if not isinstance(problem_case, str) or not isinstance(deployment_signal, dict):
+        raise HTTPException(status_code=500, detail=f"demo problem report schema error: {file_name}")
+
+    return {
+        "problem_case": problem_case,
+        "source": f"examples/validation_demo/problem_cases/{file_name}",
+        "deployment_signal": deployment_signal,
+        "accuracy": accuracy if isinstance(accuracy, dict) else {},
+        "structural_validation": structural if isinstance(structural, dict) else {},
+        "contract_validation": contract if isinstance(contract, dict) else {},
+    }
+
+
 def _build_demo_job(
     results: list[dict[str, Any]],
     compare: dict[str, Any],
     evaluation_report: dict[str, Any],
+    problem_cases: list[dict[str, Any]],
 ) -> dict[str, Any]:
     now = _utc_now_iso()
     runtime_result = results[-1] if results else {}
@@ -350,6 +390,7 @@ def _build_demo_job(
             "comparison": compare,
             "deployment_decision": compare["deployment_decision"],
             "evaluation_report": evaluation_report,
+            "problem_cases": problem_cases,
             "summary": compare["judgement"]["summary"],
         },
         "error": None,
diff --git a/inferedgelab/studio/static/app.js b/inferedgelab/studio/static/app.js
index c6cd079..39f2444 100644
--- a/inferedgelab/studio/static/app.js
+++ b/inferedgelab/studio/static/app.js
@@ -29,6 +29,7 @@ let compareData = null;
 let activeDecision = null;
 let importedResult = null;
 let demoEvaluationReport = null;
+let demoProblemCases = [];
 const importedResultsByJobId = {};
 
 function createElement(tagName, className, textContent) {
@@ -367,6 +368,7 @@ async function loadDemoEvidence() {
     const results = Array.isArray(payload.results) ? payload.results : [];
     importedResult = results[results.length - 1] || null;
     demoEvaluationReport = payload.evaluation_report || null;
+    demoProblemCases = Array.isArray(payload.problem_cases) ? payload.problem_cases : [];
     compareData = payload.compare || null;
     selectedJobId = payload.job_id || payload.job?.job_id || selectedJobId;
     selectedJob = payload.job || selectedJob;
@@ -376,6 +378,7 @@ async function loadDemoEvidence() {
     setStatus("#import-status", "Success: demo ONNX Runtime + TensorRT evidence imported.", "success");
     renderImportEvidence({ result: importedResult });
     renderDemoEvaluation(demoEvaluationReport);
+    renderDemoProblemCases(demoProblemCases);
     renderImportedResult();
     await loadJobs(selectedJobId);
     await loadCompare();
@@ -388,6 +391,37 @@ async function loadDemoEvidence() {
   }
 }
 
+function renderDemoProblemCases(problemCases = []) {
+  const target = document.querySelector("#demo-problem-cases");
+  if (!target) {
+    return;
+  }
+  target.replaceChildren();
+
+  if (!problemCases.length) {
+    return;
+  }
+
+  problemCases.forEach((problem) => {
+    const signal = problem.deployment_signal || {};
+    const structural = problem.structural_validation || {};
+    const contractShape = problem.contract_validation?.input_shape || {};
+    const accuracy = problem.accuracy || {};
+    const card = createElement("article", `problem-case ${decisionTone(signal.decision)}`);
+    card.append(
+      createElement("p", "caption", problem.problem_case || "problem case"),
+      createElement("h4", "", String(signal.decision || "review").toUpperCase()),
+      createElement("p", "body-text", signal.reason || "Validation evidence requires review."),
+      createElement(
+        "p",
+        "caption",
+        `accuracy=${accuracy.status || "-"} / structure=${structural.status || "-"} / contract=${contractShape.status || "-"}`,
+      ),
+    );
+    target.append(card);
+  });
+}
+
 function renderDemoEvaluation(report) {
   const target = document.querySelector("#demo-report-summary");
   if (!target) {
@@ -476,6 +510,7 @@ function renderRunPanel() {
   setState("#jetson-state", "idle");
   setState("#demo-state", "idle");
   renderDemoEvaluation(null);
+  renderDemoProblemCases([]);
 }
 
 function resetTransientInputs() {
diff --git a/inferedgelab/studio/static/index.html b/inferedgelab/studio/static/index.html
index 3594610..818aeda 100644
--- a/inferedgelab/studio/static/index.html
+++ b/inferedgelab/studio/static/index.html
@@ -137,8 +137,8 @@
         }
       }
     </style>
-    <link rel="stylesheet" href="/studio/static/style.css?v=16" />
-    <link rel="stylesheet" href="style.css?v=16" />
+    <link rel="stylesheet" href="/studio/static/style.css?v=17" />
+    <link rel="stylesheet" href="style.css?v=17" />
   </head>
   <body>
     <main class="shell">
@@ -267,6 +267,7 @@ <h3 id="demo-title">Replay validation evidence</h3>
               <button id="load-demo-evidence" type="button">Load Demo Evidence</button>
               <p id="demo-status" class="status-line"></p>
               <div id="demo-report-summary" class="evidence-summary demo-report-summary"></div>
+              <div id="demo-problem-cases" class="problem-case-grid"></div>
             </div>
           </article>
         </div>
@@ -332,7 +333,7 @@ <h2 id="future-title">Future Work</h2>
       </section>
     </main>
 
-    <script src="/studio/static/app.js?v=16" defer></script>
-    <script src="app.js?v=16" defer></script>
+    <script src="/studio/static/app.js?v=17" defer></script>
+    <script src="app.js?v=17" defer></script>
   </body>
 </html>
diff --git a/inferedgelab/studio/static/style.css b/inferedgelab/studio/static/style.css
index 5ebdad2..e30150a 100644
--- a/inferedgelab/studio/static/style.css
+++ b/inferedgelab/studio/static/style.css
@@ -569,6 +569,32 @@ body.file-mode .file-protocol-warning {
   grid-column: 1 / -1;
 }
 
+.problem-case-grid {
+  display: grid;
+  grid-template-columns: repeat(3, minmax(0, 1fr));
+  gap: 8px;
+}
+
+.problem-case {
+  border: 1px solid var(--line);
+  border-radius: 10px;
+  background: rgba(15, 23, 42, 0.78);
+  padding: 10px;
+}
+
+.problem-case h4 {
+  margin: 4px 0 8px;
+  font-size: 0.95rem;
+}
+
+.problem-case.blocked {
+  border-color: rgba(239, 68, 68, 0.35);
+}
+
+.problem-case.review {
+  border-color: rgba(234, 179, 8, 0.35);
+}
+
 .metric-name,
 .metric-value {
   display: block;
@@ -726,6 +752,10 @@ body.file-mode .file-protocol-warning {
     grid-template-columns: 1fr;
   }
 
+  .problem-case-grid {
+    grid-template-columns: 1fr;
+  }
+
   .inline-fields,
   .future-heading {
     grid-template-columns: 1fr;
diff --git a/tests/test_studio_routes.py b/tests/test_studio_routes.py
index a9d2436..0440213 100644
--- a/tests/test_studio_routes.py
+++ b/tests/test_studio_routes.py
@@ -60,10 +60,10 @@ def test_studio_route_returns_local_studio_html():
     assert "Import" in html
     assert "Jetson Helper" in html
     assert 'data-critical="studio-dark"' in html
-    assert 'href="/studio/static/style.css?v=16"' in html
-    assert 'href="style.css?v=16"' in html
-    assert 'src="/studio/static/app.js?v=16"' in html
-    assert 'src="app.js?v=16"' in html
+    assert 'href="/studio/static/style.css?v=17"' in html
+    assert 'href="style.css?v=17"' in html
+    assert 'src="/studio/static/app.js?v=17"' in html
+    assert 'src="app.js?v=17"' in html
     assert "file-protocol-warning" in html
     assert 'placeholder="results/latest.json"' in html
     assert 'value="results/latest.json"' not in html
@@ -77,6 +77,7 @@ def test_studio_route_returns_local_studio_html():
     assert "Load Demo Evidence" in html
     assert 'id="demo-state"' in html
     assert 'id="demo-report-summary"' in html
+    assert 'id="demo-problem-cases"' in html
 
 
 def test_studio_static_assets_are_served():
@@ -128,6 +129,7 @@ def test_studio_static_assets_include_redesigned_ui_contracts():
     assert "request record only" in app_text
     assert "loadDemoEvidence" in app_text
     assert "renderDemoEvaluation" in app_text
+    assert "renderDemoProblemCases" in app_text
     assert "/studio/api/demo-evidence" in app_text
     assert "jobDisplayName" in app_text
     assert "jobCaption" in app_text
@@ -145,6 +147,7 @@ def test_studio_static_assets_include_redesigned_ui_contracts():
     assert ".compare-card.improvement" in style_text
     assert ".demo-card" in style_text
     assert ".demo-report-summary" in style_text
+    assert ".problem-case-grid" in style_text
     assert ".compare-stat-list" in style_text
     assert ".job-row .state-pill" in style_text
     assert "flex-wrap: wrap" in style_text
@@ -353,6 +356,13 @@ def test_studio_demo_evidence_loads_compare_ready_pair():
     assert response["evaluation_report"]["accuracy"]["status"] == "evaluated"
     assert response["evaluation_report"]["accuracy"]["metrics"]["map50"] > 0
     assert response["evaluation_report"]["structural_validation"]["status"] == "passed"
+    assert len(response["problem_cases"]) == 3
+    assert {case["problem_case"] for case in response["problem_cases"]} == {
+        "annotation_missing",
+        "invalid_detection_structure",
+        "contract_shape_mismatch",
+    }
+    assert {case["deployment_signal"]["decision"] for case in response["problem_cases"]} == {"review", "blocked"}
     assert compare["status"] == "ok"
     assert compare["base"]["backend_key"] == "onnxruntime__cpu"
     assert compare["new"]["backend_key"] == "tensorrt__jetson"
@@ -378,6 +388,7 @@ def test_studio_demo_evidence_is_listed_and_selectable_as_job():
     assert detail["result"]["comparison"]["base"]["backend_key"] == "onnxruntime__cpu"
     assert detail["result"]["comparison"]["new"]["backend_key"] == "tensorrt__jetson"
     assert detail["result"]["evaluation_report"]["accuracy"]["metrics"]["precision"] > 0
+    assert detail["result"]["problem_cases"][1]["structural_validation"]["status"] == "failed"
 
 
 def test_studio_importing_two_compatible_results_returns_compare_data():
diff --git a/tests/test_validation_demo_report.py b/tests/test_validation_demo_report.py
index 19cad6e..424c712 100644
--- a/tests/test_validation_demo_report.py
+++ b/tests/test_validation_demo_report.py
@@ -22,3 +22,25 @@ def test_yolov8_coco_subset_demo_report_contains_evaluated_accuracy():
     assert round(report["accuracy"]["metrics"]["recall"], 4) == 0.1685
     assert report["structural_validation"]["status"] == "passed"
     assert report["contract_validation"]["input_shape"]["status"] == "passed"
+
+
+def test_validation_problem_case_reports_cover_review_and_blocked_paths():
+    repo_root = Path(__file__).resolve().parents[1]
+    problem_dir = repo_root / "examples" / "validation_demo" / "problem_cases"
+
+    reports = {
+        path.name: json.loads(path.read_text(encoding="utf-8"))
+        for path in sorted(problem_dir.glob("*_report.json"))
+    }
+
+    assert set(reports) == {
+        "annotation_missing_report.json",
+        "contract_shape_mismatch_report.json",
+        "invalid_detection_structure_report.json",
+    }
+    assert reports["annotation_missing_report.json"]["accuracy"]["status"] == "skipped"
+    assert reports["annotation_missing_report.json"]["deployment_signal"]["decision"] == "review"
+    assert reports["invalid_detection_structure_report.json"]["structural_validation"]["status"] == "failed"
+    assert reports["invalid_detection_structure_report.json"]["deployment_signal"]["decision"] == "blocked"
+    assert reports["contract_shape_mismatch_report.json"]["contract_validation"]["input_shape"]["status"] == "mismatch"
+    assert reports["contract_shape_mismatch_report.json"]["deployment_signal"]["decision"] == "blocked"