gwonxhj · hyeokjun32 · May 1, 2026 · May 1, 2026
diff --git a/README.md b/README.md
@@ -84,6 +84,7 @@ TensorRT Jetson was 4.6x faster than ONNX Runtime CPU in this real image input b
 The benchmark uses end-to-end Runtime latency, not trtexec GPU-only latency.
 The full pipeline portfolio summary is available at [docs/portfolio/inferedge_pipeline_portfolio.md](docs/portfolio/inferedge_pipeline_portfolio.md), and the detailed Runtime comparison report is available at [docs/portfolio/runtime_compare_yolov8n.md](docs/portfolio/runtime_compare_yolov8n.md).
 The YOLOv8 COCO subset accuracy demo is documented in [docs/portfolio/yolov8_coco_subset_evaluation.md](docs/portfolio/yolov8_coco_subset_evaluation.md).
+Validation problem cases are documented in [docs/portfolio/validation_problem_cases.md](docs/portfolio/validation_problem_cases.md).
 
 ## Local Studio Demo Evidence
 
@@ -102,6 +103,7 @@ Verified demo fixture values:
 Studio reports this as a `4.57x` TensorRT speedup for the bundled demo pair.
 AIGuard remains optional in this local Studio path; if Guard evidence is not loaded, the deployment decision explains that the Lab comparison is available but diagnosis evidence is not provided.
 The same demo flow also surfaces a small `yolov8_coco` evaluation report summary: 10 images, 89 ground-truth boxes, mAP@50 `0.1410`, precision `0.2941`, recall `0.1685`, structural validation `passed`.
+It also includes problem-case summaries for annotation-missing review, invalid detection structure blocking, and contract shape mismatch blocking.
 
 ---
 

diff --git a/docs/portfolio/inferedge_pipeline_status.md b/docs/portfolio/inferedge_pipeline_status.md
@@ -97,6 +97,7 @@ The current cross-repository loop is covered by documentation, fixtures, and smo
 - Lab deployment decision/report evidence smoke for AIGuard worker provenance diagnosis
 - Local Studio local-first workflow UI for viewing Forge -> Runtime -> Lab -> optional AIGuard state, creating in-memory analyze jobs, importing Runtime result JSON, replaying bundled demo evidence, comparing backends, and inspecting Lab-owned deployment decision context
 - YOLOv8 COCO subset evaluation report generated from 10 local images and 89 converted COCO-style person annotations, with mAP@50 0.1410, precision 0.2941, recall 0.1685, and structural validation passed
+- Validation problem case fixtures for annotation-missing review, invalid detection structure blocking, and contract shape mismatch blocking
 
 This means the current product boundary is testable without running the production worker infrastructure.
 
@@ -127,6 +128,7 @@ Demo readiness: `scripts/demo_pipeline_full.sh` now provides a guided end-to-end
 - Guided end-to-end demo entrypoint for portfolio and interview walkthroughs
 - Local Studio at `/studio` for a local-first browser view of Run / Import / Demo Evidence / Compare / Decision / Jetson Helper workflows
 - Contract/preset validation demo with `yolov8_coco`, COCO annotation loading, simplified accuracy metrics, structural validation, and JSON/Markdown/HTML report fixtures
+- Problem-case validation reports that make skipped accuracy, invalid output structure, and contract mismatch visible in Local Studio
 - Cross-repo fixture compatibility across Forge, Runtime, Lab, and AIGuard
 - Rule/evidence based provenance mismatch diagnosis
 

diff --git a/docs/portfolio/inferedge_portfolio_submission.md b/docs/portfolio/inferedge_portfolio_submission.md
@@ -113,6 +113,7 @@ Recent validation evidence:
 - Guided demo entrypoint: `scripts/demo_pipeline_full.sh` summarizes the full Forge -> Runtime -> Lab -> optional AIGuard flow and can print the Jetson TensorRT Runtime command without claiming production worker or SaaS readiness.
 - Local Studio demo evidence: `/studio` can load bundled ONNX Runtime CPU and TensorRT Jetson Runtime result fixtures from `examples/studio_demo`, keep the demo pair selectable in Recent jobs while the local server process is alive, and show TensorRT Jetson vs ONNX Runtime CPU comparison in the browser. The fixture-backed evidence records ONNX Runtime CPU at mean 45.4299 ms / p99 49.2128 ms / 22.0119 FPS and TensorRT Jetson at mean 9.9375 ms / p99 15.5231 ms / 100.6293 FPS, a 4.57x TensorRT speedup for this demo pair.
 - YOLOv8 COCO subset evaluation: a 10-image local person-detection subset with 89 ground-truth boxes is converted into a COCO-style annotation fixture and evaluated through the `yolov8_coco` preset. The generated report records mAP@50 0.1410, precision 0.2941, recall 0.1685, and structural validation passed. This is documented as subset workflow evidence, not a full COCO benchmark claim.
+- Validation problem cases: the demo bundle includes annotation-missing, invalid detection structure, and contract shape mismatch reports. These show that InferEdge records review/block evidence explicitly instead of presenting every validation path as successful.
 
 The direct Runtime execution result includes `deployment_decision`. Its `unknown` value is expected before Lab compare/report because the worker response has not yet been compared by Lab.
 

diff --git a/docs/portfolio/validation_problem_cases.md b/docs/portfolio/validation_problem_cases.md
@@ -0,0 +1,24 @@
+# Validation Problem Cases
+
+InferEdge does not hide validation failures. These fixtures show how the Lab evidence layer records cases that need review or should be blocked.
+
+## Cases
+
+| Case | Decision Signal | What It Demonstrates |
+|---|---|---|
+| annotation missing | review | Accuracy is intentionally skipped when annotation evidence is unavailable. |
+| invalid detection structure | blocked | Score/bbox structural checks can block malformed detection output. |
+| contract shape mismatch | blocked | Runtime input shape must match the declared `model_contract.json`. |
+
+## Files
+
+- `examples/validation_demo/problem_cases/annotation_missing_report.json`
+- `examples/validation_demo/problem_cases/invalid_detection_structure_report.json`
+- `examples/validation_demo/problem_cases/contract_shape_mismatch_report.json`
+
+## Interpretation
+
+These are deliberately small report fixtures, not production SaaS records.
+They make the portfolio story clearer: InferEdge is a contract/preset validation pipeline, so missing annotations, malformed outputs, and contract mismatches are explicit evidence states.
+
+Local Studio includes these problem cases in the `Load Demo Evidence` flow so the browser demo can show both the happy path and the review/block paths.
diff --git a/examples/validation_demo/problem_cases/README.md b/examples/validation_demo/problem_cases/README.md
@@ -0,0 +1,12 @@
+# Validation Problem Demo Cases
+
+These fixtures show how InferEdgeLab records uncomfortable validation evidence instead of hiding it.
+They are intentionally small JSON reports and do not include raw images.
+
+| Case | Signal | Reason |
+|---|---|---|
+| `annotation_missing_report.json` | review | Accuracy is skipped because annotation evidence is not provided. |
+| `invalid_detection_structure_report.json` | blocked | Detection output contains invalid score/bbox structure. |
+| `contract_shape_mismatch_report.json` | blocked | Runtime input shape does not match `model_contract.json`. |
+
+These reports are portfolio fixtures, not production SaaS data.
diff --git a/examples/validation_demo/problem_cases/annotation_missing_report.json b/examples/validation_demo/problem_cases/annotation_missing_report.json
@@ -0,0 +1,48 @@
+{
+  "report_role": "inferedge-evaluation-report",
+  "generated_at": "2026-05-01T00:00:00Z",
+  "problem_case": "annotation_missing",
+  "preset": {
+    "name": "yolov8_coco",
+    "task": "object_detection"
+  },
+  "runtime_result": {
+    "engine": "onnxruntime",
+    "device": "cpu",
+    "sample_count": 10,
+    "actual_input_shape": [1, 3, 640, 640]
+  },
+  "accuracy": {
+    "status": "skipped",
+    "metrics": {
+      "map50": 0.0,
+      "map50_95": 0.0,
+      "f1_score": 0.0,
+      "precision": 0.0,
+      "recall": 0.0
+    },
+    "reason": "annotation_not_provided"
+  },
+  "contract_validation": {
+    "input_shape": {
+      "status": "passed",
+      "actual_shape": [1, 3, 640, 640],
+      "expected_shape": [1, 3, 640, 640]
+    },
+    "preset": "yolov8_coco",
+    "task": "object_detection"
+  },
+  "structural_validation": {
+    "status": "passed",
+    "checked": {
+      "image_count": 10,
+      "detection_count": 51,
+      "num_classes": 80
+    },
+    "issues": []
+  },
+  "deployment_signal": {
+    "decision": "review",
+    "reason": "Accuracy evaluation was skipped because annotations were not provided."
+  }
+}
diff --git a/examples/validation_demo/problem_cases/contract_shape_mismatch_report.json b/examples/validation_demo/problem_cases/contract_shape_mismatch_report.json
@@ -0,0 +1,48 @@
+{
+  "report_role": "inferedge-evaluation-report",
+  "generated_at": "2026-05-01T00:00:00Z",
+  "problem_case": "contract_shape_mismatch",
+  "preset": {
+    "name": "yolov8_coco",
+    "task": "object_detection"
+  },
+  "runtime_result": {
+    "engine": "onnxruntime",
+    "device": "cpu",
+    "sample_count": 1,
+    "actual_input_shape": [1, 3, 640, 640]
+  },
+  "accuracy": {
+    "status": "skipped",
+    "metrics": {
+      "map50": 0.0,
+      "map50_95": 0.0,
+      "f1_score": 0.0,
+      "precision": 0.0,
+      "recall": 0.0
+    },
+    "reason": "model_contract_input_shape_mismatch"
+  },
+  "contract_validation": {
+    "input_shape": {
+      "status": "mismatch",
+      "actual_shape": [1, 3, 640, 640],
+      "expected_shape": [1, 3, 320, 320]
+    },
+    "preset": "yolov8_coco",
+    "task": "object_detection"
+  },
+  "structural_validation": {
+    "status": "passed",
+    "checked": {
+      "image_count": 1,
+      "detection_count": 0,
+      "num_classes": 80
+    },
+    "issues": []
+  },
+  "deployment_signal": {
+    "decision": "blocked",
+    "reason": "Actual runtime input shape does not match the model contract."
+  }
+}
diff --git a/examples/validation_demo/problem_cases/invalid_detection_structure_report.json b/examples/validation_demo/problem_cases/invalid_detection_structure_report.json
@@ -0,0 +1,61 @@
+{
+  "report_role": "inferedge-evaluation-report",
+  "generated_at": "2026-05-01T00:00:00Z",
+  "problem_case": "invalid_detection_structure",
+  "preset": {
+    "name": "yolov8_coco",
+    "task": "object_detection"
+  },
+  "runtime_result": {
+    "engine": "onnxruntime",
+    "device": "cpu",
+    "sample_count": 1,
+    "actual_input_shape": [1, 3, 640, 640]
+  },
+  "accuracy": {
+    "status": "skipped",
+    "metrics": {
+      "map50": 0.0,
+      "map50_95": 0.0,
+      "f1_score": 0.0,
+      "precision": 0.0,
+      "recall": 0.0
+    },
+    "reason": "structural_validation_failed_before_accuracy"
+  },
+  "contract_validation": {
+    "input_shape": {
+      "status": "passed",
+      "actual_shape": [1, 3, 640, 640],
+      "expected_shape": [1, 3, 640, 640]
+    },
+    "preset": "yolov8_coco",
+    "task": "object_detection"
+  },
+  "structural_validation": {
+    "status": "failed",
+    "checked": {
+      "image_count": 1,
+      "detection_count": 1,
+      "num_classes": 80
+    },
+    "issues": [
+      {
+        "image_index": 0,
+        "detection_index": 0,
+        "code": "score_out_of_range",
+        "value": 1.42
+      },
+      {
+        "image_index": 0,
+        "detection_index": 0,
+        "code": "bbox_non_positive_size",
+        "value": [320.0, 320.0, -12.0, 48.0]
+      }
+    ]
+  },
+  "deployment_signal": {
+    "decision": "blocked",
+    "reason": "Structural validation found invalid detection output."
+  }
+}
diff --git a/inferedgelab/studio/routes.py b/inferedgelab/studio/routes.py
@@ -22,11 +22,17 @@
 STATIC_DIR = Path(__file__).resolve().parent / "static"
 DEMO_EVIDENCE_DIR = Path(__file__).resolve().parents[2] / "examples" / "studio_demo"
 VALIDATION_DEMO_DIR = Path(__file__).resolve().parents[2] / "examples" / "validation_demo" / "subset"
+VALIDATION_PROBLEM_DIR = Path(__file__).resolve().parents[2] / "examples" / "validation_demo" / "problem_cases"
 DEMO_EVIDENCE_FILES = (
     "onnxruntime_cpu_result.json",
     "tensorrt_jetson_result.json",
 )
 DEMO_EVALUATION_REPORT = "yolov8_coco_subset_evaluation.json"
+DEMO_PROBLEM_REPORTS = (
+    "annotation_missing_report.json",
+    "invalid_detection_structure_report.json",
+    "contract_shape_mismatch_report.json",
+)
 DEMO_JOB_ID = "demo_yolov8n_trt_vs_onnx"
 STATIC_ASSETS = {
     "app.js": "application/javascript",
@@ -159,10 +165,11 @@ def studio_import(request: Request, payload: dict[str, Any] = Body(...)) -> dict
 def studio_demo_evidence(request: Request) -> dict[str, Any]:
     results = [_load_demo_result(file_name) for file_name in DEMO_EVIDENCE_FILES]
     evaluation_report = _load_demo_evaluation_report()
+    problem_cases = _load_demo_problem_cases()
     imported_results = _get_imported_results(request)
     imported_results.extend(results)
     compare = _build_imported_compare_response(results[0], results[1])
-    demo_job = _build_demo_job(results, compare, evaluation_report)
+    demo_job = _build_demo_job(results, compare, evaluation_report, problem_cases)
     _get_demo_jobs(request)[DEMO_JOB_ID] = demo_job
     return {
         "status": "loaded",
@@ -174,6 +181,7 @@ def studio_demo_evidence(request: Request) -> dict[str, Any]:
         "compare_ready": True,
         "compare": compare,
         "evaluation_report": evaluation_report,
+        "problem_cases": problem_cases,
         "deployment_decision": compare["deployment_decision"],
     }
 
@@ -327,10 +335,42 @@ def _load_demo_evaluation_report() -> dict[str, Any]:
     }
 
 
+def _load_demo_problem_cases() -> list[dict[str, Any]]:
+    return [_load_problem_report(file_name) for file_name in DEMO_PROBLEM_REPORTS]
+
+
+def _load_problem_report(file_name: str) -> dict[str, Any]:
+    path = VALIDATION_PROBLEM_DIR / file_name
+    try:
+        report = json.loads(path.read_text(encoding="utf-8"))
+    except OSError as exc:
+        raise HTTPException(status_code=500, detail=f"demo problem report not found: {file_name}") from exc
+    except json.JSONDecodeError as exc:
+        raise HTTPException(status_code=500, detail=f"demo problem report is invalid JSON: {file_name}") from exc
+
+    problem_case = report.get("problem_case") if isinstance(report, dict) else None
+    deployment_signal = report.get("deployment_signal") if isinstance(report, dict) else None
+    structural = report.get("structural_validation") if isinstance(report, dict) else None
+    contract = report.get("contract_validation") if isinstance(report, dict) else None
+    accuracy = report.get("accuracy") if isinstance(report, dict) else None
+    if not isinstance(problem_case, str) or not isinstance(deployment_signal, dict):
+        raise HTTPException(status_code=500, detail=f"demo problem report schema error: {file_name}")
+
+    return {
+        "problem_case": problem_case,
+        "source": f"examples/validation_demo/problem_cases/{file_name}",
+        "deployment_signal": deployment_signal,
+        "accuracy": accuracy if isinstance(accuracy, dict) else {},
+        "structural_validation": structural if isinstance(structural, dict) else {},
+        "contract_validation": contract if isinstance(contract, dict) else {},
+    }
+
+
 def _build_demo_job(
     results: list[dict[str, Any]],
     compare: dict[str, Any],
     evaluation_report: dict[str, Any],
+    problem_cases: list[dict[str, Any]],
 ) -> dict[str, Any]:
     now = _utc_now_iso()
     runtime_result = results[-1] if results else {}
@@ -350,6 +390,7 @@ def _build_demo_job(
             "comparison": compare,
             "deployment_decision": compare["deployment_decision"],
             "evaluation_report": evaluation_report,
+            "problem_cases": problem_cases,
             "summary": compare["judgement"]["summary"],
         },
         "error": None,

diff --git a/inferedgelab/studio/static/app.js b/inferedgelab/studio/static/app.js
@@ -29,6 +29,7 @@ let compareData = null;
 let activeDecision = null;
 let importedResult = null;
 let demoEvaluationReport = null;
+let demoProblemCases = [];
 const importedResultsByJobId = {};
 
 function createElement(tagName, className, textContent) {
@@ -367,6 +368,7 @@ async function loadDemoEvidence() {
     const results = Array.isArray(payload.results) ? payload.results : [];
     importedResult = results[results.length - 1] || null;
     demoEvaluationReport = payload.evaluation_report || null;
+    demoProblemCases = Array.isArray(payload.problem_cases) ? payload.problem_cases : [];
     compareData = payload.compare || null;
     selectedJobId = payload.job_id || payload.job?.job_id || selectedJobId;
     selectedJob = payload.job || selectedJob;
@@ -376,6 +378,7 @@ async function loadDemoEvidence() {
     setStatus("#import-status", "Success: demo ONNX Runtime + TensorRT evidence imported.", "success");
     renderImportEvidence({ result: importedResult });
     renderDemoEvaluation(demoEvaluationReport);
+    renderDemoProblemCases(demoProblemCases);
     renderImportedResult();
     await loadJobs(selectedJobId);
     await loadCompare();
@@ -388,6 +391,37 @@ async function loadDemoEvidence() {
   }
 }
 
+function renderDemoProblemCases(problemCases = []) {
+  const target = document.querySelector("#demo-problem-cases");
+  if (!target) {
+    return;
+  }
+  target.replaceChildren();
+
+  if (!problemCases.length) {
+    return;
+  }
+
+  problemCases.forEach((problem) => {
+    const signal = problem.deployment_signal || {};
+    const structural = problem.structural_validation || {};
+    const contractShape = problem.contract_validation?.input_shape || {};
+    const accuracy = problem.accuracy || {};
+    const card = createElement("article", `problem-case ${decisionTone(signal.decision)}`);
+    card.append(
+      createElement("p", "caption", problem.problem_case || "problem case"),
+      createElement("h4", "", String(signal.decision || "review").toUpperCase()),
+      createElement("p", "body-text", signal.reason || "Validation evidence requires review."),
+      createElement(
+        "p",
+        "caption",
+        `accuracy=${accuracy.status || "-"} / structure=${structural.status || "-"} / contract=${contractShape.status || "-"}`,
+      ),
+    );
+    target.append(card);
+  });
+}
+
 function renderDemoEvaluation(report) {
   const target = document.querySelector("#demo-report-summary");
   if (!target) {
@@ -476,6 +510,7 @@ function renderRunPanel() {
   setState("#jetson-state", "idle");
   setState("#demo-state", "idle");
   renderDemoEvaluation(null);
+  renderDemoProblemCases([]);
 }
 
 function resetTransientInputs() {