diff --git a/docs/portfolio/validation_problem_cases.md b/docs/portfolio/validation_problem_cases.md index a59b9f1..7c6a3b8 100644 --- a/docs/portfolio/validation_problem_cases.md +++ b/docs/portfolio/validation_problem_cases.md @@ -9,16 +9,20 @@ InferEdge does not hide validation failures. These fixtures show how the Lab evi | annotation missing | review | Accuracy is intentionally skipped when annotation evidence is unavailable. | | invalid detection structure | blocked | Score/bbox structural checks can block malformed detection output. | | contract shape mismatch | blocked | Runtime input shape must match the declared `model_contract.json`. | +| latency regression | review_required | Same backend/run_config latency regression can force deployment review even when the result is structurally valid. | ## Files - `examples/validation_demo/problem_cases/annotation_missing_report.json` - `examples/validation_demo/problem_cases/invalid_detection_structure_report.json` - `examples/validation_demo/problem_cases/contract_shape_mismatch_report.json` +- `examples/studio_demo/normal_baseline_result.json` +- `examples/studio_demo/latency_regression_result.json` +- `examples/studio_demo/latency_regression_summary.json` ## Interpretation These are deliberately small report fixtures, not production SaaS records. -They make the portfolio story clearer: InferEdge is a contract/preset validation pipeline, so missing annotations, malformed outputs, and contract mismatches are explicit evidence states. +They make the portfolio story clearer: InferEdge is a contract/preset validation pipeline, so missing annotations, malformed outputs, contract mismatches, and latency regressions are explicit evidence states. -Local Studio includes these problem cases in the `Load Demo Evidence` flow so the browser demo can show both the happy path and the review/block paths. +Local Studio includes these problem cases in the `Load Demo Evidence` flow so the browser demo can show both the happy path and the review/block paths. The latency regression case intentionally compares the same TensorRT Jetson FP16 backend and run configuration so the review signal is about performance regression, not a backend mismatch. diff --git a/examples/studio_demo/latency_regression_result.json b/examples/studio_demo/latency_regression_result.json new file mode 100644 index 0000000..6b425fb --- /dev/null +++ b/examples/studio_demo/latency_regression_result.json @@ -0,0 +1,41 @@ +{ + "runtime_role": "runtime-result", + "model": "yolov8n.onnx", + "engine": "tensorrt", + "engine_backend": "tensorrt", + "device": "jetson", + "device_name": "jetson", + "precision": "fp16", + "batch": 1, + "height": 640, + "width": 640, + "mean_ms": 16.1, + "p99_ms": 22.0, + "fps_value": 62.112, + "success": true, + "status": "success", + "timestamp": "2026-04-30T12:20:00Z", + "compare_key": "yolov8n__b1__h640w640__fp16", + "backend_key": "tensorrt__jetson", + "system": { + "os": "Linux 5.15.148-tegra", + "machine": "aarch64" + }, + "run_config": { + "warmup": 1, + "runs": 5, + "mode": "image", + "task": "detection", + "precision": "fp16" + }, + "accuracy": {}, + "extra": { + "problem_case_role": "latency_regression_new", + "input_mode": "image", + "input_preprocess": "opencv_bgr_to_rgb_resize_float32_nchw", + "manifest_applied": true, + "effective_batch": 1, + "effective_height": 640, + "effective_width": 640 + } +} diff --git a/examples/studio_demo/latency_regression_summary.json b/examples/studio_demo/latency_regression_summary.json new file mode 100644 index 0000000..fd0f05d --- /dev/null +++ b/examples/studio_demo/latency_regression_summary.json @@ -0,0 +1,79 @@ +{ + "problem_case": "latency_regression", + "problem_case_type": "runtime_latency", + "source": "examples/studio_demo/latency_regression_summary.json", + "baseline_source": "examples/studio_demo/normal_baseline_result.json", + "new_source": "examples/studio_demo/latency_regression_result.json", + "policy": { + "mean_latency_regression_pct": 10.0, + "p99_latency_regression_pct": 20.0, + "fps_drop_pct": 10.0, + "run_config_mismatch": "invalid_comparison" + }, + "baseline": { + "label": "normal baseline", + "backend_key": "tensorrt__jetson", + "compare_key": "yolov8n__b1__h640w640__fp16", + "mean_ms": 14.0, + "p99_ms": 15.5, + "fps_value": 71.429, + "run_config": { + "warmup": 1, + "runs": 5, + "mode": "image", + "task": "detection", + "precision": "fp16" + } + }, + "new": { + "label": "regressed result", + "backend_key": "tensorrt__jetson", + "compare_key": "yolov8n__b1__h640w640__fp16", + "mean_ms": 16.1, + "p99_ms": 22.0, + "fps_value": 62.112, + "run_config": { + "warmup": 1, + "runs": 5, + "mode": "image", + "task": "detection", + "precision": "fp16" + } + }, + "latency_checks": { + "mean_latency": { + "status": "review", + "baseline_ms": 14.0, + "new_ms": 16.1, + "delta_ms": 2.1, + "delta_pct": 15.0, + "threshold_pct": 10.0 + }, + "p99_latency": { + "status": "review", + "baseline_ms": 15.5, + "new_ms": 22.0, + "delta_ms": 6.5, + "delta_pct": 41.935, + "threshold_pct": 20.0 + }, + "fps": { + "status": "review", + "baseline": 71.429, + "new": 62.112, + "delta": -9.317, + "delta_pct": -13.044, + "threshold_pct": 10.0 + }, + "run_config": { + "status": "passed", + "mismatch": false + } + }, + "deployment_signal": { + "decision": "review_required", + "reason": "p99 latency regression detected", + "notes": "Same backend, precision, and run_config comparison shows the new TensorRT Jetson result exceeds the p99 regression threshold.", + "recommended_action": "Review runtime provenance and rerun the same Jetson benchmark before deployment." + } +} diff --git a/examples/studio_demo/normal_baseline_result.json b/examples/studio_demo/normal_baseline_result.json new file mode 100644 index 0000000..a3e20f5 --- /dev/null +++ b/examples/studio_demo/normal_baseline_result.json @@ -0,0 +1,41 @@ +{ + "runtime_role": "runtime-result", + "model": "yolov8n.onnx", + "engine": "tensorrt", + "engine_backend": "tensorrt", + "device": "jetson", + "device_name": "jetson", + "precision": "fp16", + "batch": 1, + "height": 640, + "width": 640, + "mean_ms": 14.0, + "p99_ms": 15.5, + "fps_value": 71.429, + "success": true, + "status": "success", + "timestamp": "2026-04-30T12:10:00Z", + "compare_key": "yolov8n__b1__h640w640__fp16", + "backend_key": "tensorrt__jetson", + "system": { + "os": "Linux 5.15.148-tegra", + "machine": "aarch64" + }, + "run_config": { + "warmup": 1, + "runs": 5, + "mode": "image", + "task": "detection", + "precision": "fp16" + }, + "accuracy": {}, + "extra": { + "problem_case_role": "latency_regression_baseline", + "input_mode": "image", + "input_preprocess": "opencv_bgr_to_rgb_resize_float32_nchw", + "manifest_applied": true, + "effective_batch": 1, + "effective_height": 640, + "effective_width": 640 + } +} diff --git a/inferedgelab/studio/routes.py b/inferedgelab/studio/routes.py index e948463..b5a08b6 100644 --- a/inferedgelab/studio/routes.py +++ b/inferedgelab/studio/routes.py @@ -33,6 +33,7 @@ "invalid_detection_structure_report.json", "contract_shape_mismatch_report.json", ) +LATENCY_REGRESSION_SUMMARY = "latency_regression_summary.json" DEMO_JOB_ID = "demo_yolov8n_trt_vs_onnx" STATIC_ASSETS = { "app.js": "application/javascript", @@ -336,7 +337,38 @@ def _load_demo_evaluation_report() -> dict[str, Any]: def _load_demo_problem_cases() -> list[dict[str, Any]]: - return [_load_problem_report(file_name) for file_name in DEMO_PROBLEM_REPORTS] + cases = [_load_problem_report(file_name) for file_name in DEMO_PROBLEM_REPORTS] + cases.append(_load_latency_regression_summary()) + return cases + + +def _load_latency_regression_summary() -> dict[str, Any]: + path = DEMO_EVIDENCE_DIR / LATENCY_REGRESSION_SUMMARY + try: + summary = json.loads(path.read_text(encoding="utf-8")) + except OSError as exc: + raise HTTPException(status_code=500, detail=f"latency regression summary not found: {LATENCY_REGRESSION_SUMMARY}") from exc + except json.JSONDecodeError as exc: + raise HTTPException(status_code=500, detail=f"latency regression summary is invalid JSON: {LATENCY_REGRESSION_SUMMARY}") from exc + + problem_case = summary.get("problem_case") if isinstance(summary, dict) else None + deployment_signal = summary.get("deployment_signal") if isinstance(summary, dict) else None + latency_checks = summary.get("latency_checks") if isinstance(summary, dict) else None + if not isinstance(problem_case, str) or not isinstance(deployment_signal, dict) or not isinstance(latency_checks, dict): + raise HTTPException(status_code=500, detail=f"latency regression summary schema error: {LATENCY_REGRESSION_SUMMARY}") + + return { + "problem_case": problem_case, + "problem_case_type": summary.get("problem_case_type") or "runtime_latency", + "source": f"examples/studio_demo/{LATENCY_REGRESSION_SUMMARY}", + "baseline_source": summary.get("baseline_source"), + "new_source": summary.get("new_source"), + "policy": summary.get("policy") or {}, + "baseline": summary.get("baseline") or {}, + "new": summary.get("new") or {}, + "latency_checks": latency_checks, + "deployment_signal": deployment_signal, + } def _load_problem_report(file_name: str) -> dict[str, Any]: diff --git a/inferedgelab/studio/static/app.js b/inferedgelab/studio/static/app.js index 39f2444..5d17ff7 100644 --- a/inferedgelab/studio/static/app.js +++ b/inferedgelab/studio/static/app.js @@ -404,24 +404,31 @@ function renderDemoProblemCases(problemCases = []) { problemCases.forEach((problem) => { const signal = problem.deployment_signal || {}; - const structural = problem.structural_validation || {}; - const contractShape = problem.contract_validation?.input_shape || {}; - const accuracy = problem.accuracy || {}; const card = createElement("article", `problem-case ${decisionTone(signal.decision)}`); card.append( createElement("p", "caption", problem.problem_case || "problem case"), createElement("h4", "", String(signal.decision || "review").toUpperCase()), createElement("p", "body-text", signal.reason || "Validation evidence requires review."), - createElement( - "p", - "caption", - `accuracy=${accuracy.status || "-"} / structure=${structural.status || "-"} / contract=${contractShape.status || "-"}`, - ), + createElement("p", "caption", problemCaseDetail(problem)), ); target.append(card); }); } +function problemCaseDetail(problem = {}) { + if (problem.problem_case_type === "runtime_latency" || problem.latency_checks) { + const checks = problem.latency_checks || {}; + const mean = checks.mean_latency?.delta_pct; + const p99 = checks.p99_latency?.delta_pct; + const fps = checks.fps?.delta_pct; + return `mean=${formatPercent(mean)} / p99=${formatPercent(p99)} / fps=${formatPercent(fps)} / run_config=${checks.run_config?.status || "-"}`; + } + const structural = problem.structural_validation || {}; + const contractShape = problem.contract_validation?.input_shape || {}; + const accuracy = problem.accuracy || {}; + return `accuracy=${accuracy.status || "-"} / structure=${structural.status || "-"} / contract=${contractShape.status || "-"}`; +} + function renderDemoEvaluation(report) { const target = document.querySelector("#demo-report-summary"); if (!target) { @@ -1037,6 +1044,13 @@ function formatNumber(value) { return number.toFixed(3).replace(/\.?0+$/, ""); } +function formatPercent(value) { + if (value === undefined || value === null) { + return "-"; + } + return `${formatNumber(value)}%`; +} + function formatValue(value) { return displayValue(value); } diff --git a/tests/test_studio_routes.py b/tests/test_studio_routes.py index 0440213..6aaeb95 100644 --- a/tests/test_studio_routes.py +++ b/tests/test_studio_routes.py @@ -356,13 +356,21 @@ def test_studio_demo_evidence_loads_compare_ready_pair(): assert response["evaluation_report"]["accuracy"]["status"] == "evaluated" assert response["evaluation_report"]["accuracy"]["metrics"]["map50"] > 0 assert response["evaluation_report"]["structural_validation"]["status"] == "passed" - assert len(response["problem_cases"]) == 3 + assert len(response["problem_cases"]) == 4 assert {case["problem_case"] for case in response["problem_cases"]} == { "annotation_missing", "invalid_detection_structure", "contract_shape_mismatch", + "latency_regression", } - assert {case["deployment_signal"]["decision"] for case in response["problem_cases"]} == {"review", "blocked"} + assert {case["deployment_signal"]["decision"] for case in response["problem_cases"]} == { + "review", + "blocked", + "review_required", + } + latency_case = next(case for case in response["problem_cases"] if case["problem_case"] == "latency_regression") + assert latency_case["latency_checks"]["p99_latency"]["delta_pct"] > 20 + assert latency_case["deployment_signal"]["reason"] == "p99 latency regression detected" assert compare["status"] == "ok" assert compare["base"]["backend_key"] == "onnxruntime__cpu" assert compare["new"]["backend_key"] == "tensorrt__jetson" @@ -389,6 +397,7 @@ def test_studio_demo_evidence_is_listed_and_selectable_as_job(): assert detail["result"]["comparison"]["new"]["backend_key"] == "tensorrt__jetson" assert detail["result"]["evaluation_report"]["accuracy"]["metrics"]["precision"] > 0 assert detail["result"]["problem_cases"][1]["structural_validation"]["status"] == "failed" + assert detail["result"]["problem_cases"][3]["problem_case"] == "latency_regression" def test_studio_importing_two_compatible_results_returns_compare_data(): diff --git a/tests/test_validation_demo_report.py b/tests/test_validation_demo_report.py index 424c712..4591f2d 100644 --- a/tests/test_validation_demo_report.py +++ b/tests/test_validation_demo_report.py @@ -44,3 +44,24 @@ def test_validation_problem_case_reports_cover_review_and_blocked_paths(): assert reports["invalid_detection_structure_report.json"]["deployment_signal"]["decision"] == "blocked" assert reports["contract_shape_mismatch_report.json"]["contract_validation"]["input_shape"]["status"] == "mismatch" assert reports["contract_shape_mismatch_report.json"]["deployment_signal"]["decision"] == "blocked" + + +def test_latency_regression_problem_case_records_review_signal(): + repo_root = Path(__file__).resolve().parents[1] + summary_path = repo_root / "examples" / "studio_demo" / "latency_regression_summary.json" + baseline_path = repo_root / "examples" / "studio_demo" / "normal_baseline_result.json" + regression_path = repo_root / "examples" / "studio_demo" / "latency_regression_result.json" + + summary = json.loads(summary_path.read_text(encoding="utf-8")) + baseline = json.loads(baseline_path.read_text(encoding="utf-8")) + regression = json.loads(regression_path.read_text(encoding="utf-8")) + + assert summary["problem_case"] == "latency_regression" + assert summary["deployment_signal"]["decision"] == "review_required" + assert summary["deployment_signal"]["reason"] == "p99 latency regression detected" + assert summary["latency_checks"]["mean_latency"]["delta_pct"] >= 10.0 + assert summary["latency_checks"]["p99_latency"]["delta_pct"] >= 20.0 + assert summary["latency_checks"]["run_config"]["status"] == "passed" + assert baseline["backend_key"] == regression["backend_key"] == "tensorrt__jetson" + assert baseline["compare_key"] == regression["compare_key"] + assert baseline["run_config"] == regression["run_config"]