Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions docs/portfolio/validation_problem_cases.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,20 @@ InferEdge does not hide validation failures. These fixtures show how the Lab evi
| annotation missing | review | Accuracy is intentionally skipped when annotation evidence is unavailable. |
| invalid detection structure | blocked | Score/bbox structural checks can block malformed detection output. |
| contract shape mismatch | blocked | Runtime input shape must match the declared `model_contract.json`. |
| latency regression | review_required | Same backend/run_config latency regression can force deployment review even when the result is structurally valid. |

## Files

- `examples/validation_demo/problem_cases/annotation_missing_report.json`
- `examples/validation_demo/problem_cases/invalid_detection_structure_report.json`
- `examples/validation_demo/problem_cases/contract_shape_mismatch_report.json`
- `examples/studio_demo/normal_baseline_result.json`
- `examples/studio_demo/latency_regression_result.json`
- `examples/studio_demo/latency_regression_summary.json`

## Interpretation

These are deliberately small report fixtures, not production SaaS records.
They make the portfolio story clearer: InferEdge is a contract/preset validation pipeline, so missing annotations, malformed outputs, and contract mismatches are explicit evidence states.
They make the portfolio story clearer: InferEdge is a contract/preset validation pipeline, so missing annotations, malformed outputs, contract mismatches, and latency regressions are explicit evidence states.

Local Studio includes these problem cases in the `Load Demo Evidence` flow so the browser demo can show both the happy path and the review/block paths.
Local Studio includes these problem cases in the `Load Demo Evidence` flow so the browser demo can show both the happy path and the review/block paths. The latency regression case intentionally compares the same TensorRT Jetson FP16 backend and run configuration so the review signal is about performance regression, not a backend mismatch.
41 changes: 41 additions & 0 deletions examples/studio_demo/latency_regression_result.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"runtime_role": "runtime-result",
"model": "yolov8n.onnx",
"engine": "tensorrt",
"engine_backend": "tensorrt",
"device": "jetson",
"device_name": "jetson",
"precision": "fp16",
"batch": 1,
"height": 640,
"width": 640,
"mean_ms": 16.1,
"p99_ms": 22.0,
"fps_value": 62.112,
"success": true,
"status": "success",
"timestamp": "2026-04-30T12:20:00Z",
"compare_key": "yolov8n__b1__h640w640__fp16",
"backend_key": "tensorrt__jetson",
"system": {
"os": "Linux 5.15.148-tegra",
"machine": "aarch64"
},
"run_config": {
"warmup": 1,
"runs": 5,
"mode": "image",
"task": "detection",
"precision": "fp16"
},
"accuracy": {},
"extra": {
"problem_case_role": "latency_regression_new",
"input_mode": "image",
"input_preprocess": "opencv_bgr_to_rgb_resize_float32_nchw",
"manifest_applied": true,
"effective_batch": 1,
"effective_height": 640,
"effective_width": 640
}
}
79 changes: 79 additions & 0 deletions examples/studio_demo/latency_regression_summary.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
{
"problem_case": "latency_regression",
"problem_case_type": "runtime_latency",
"source": "examples/studio_demo/latency_regression_summary.json",
"baseline_source": "examples/studio_demo/normal_baseline_result.json",
"new_source": "examples/studio_demo/latency_regression_result.json",
"policy": {
"mean_latency_regression_pct": 10.0,
"p99_latency_regression_pct": 20.0,
"fps_drop_pct": 10.0,
"run_config_mismatch": "invalid_comparison"
},
"baseline": {
"label": "normal baseline",
"backend_key": "tensorrt__jetson",
"compare_key": "yolov8n__b1__h640w640__fp16",
"mean_ms": 14.0,
"p99_ms": 15.5,
"fps_value": 71.429,
"run_config": {
"warmup": 1,
"runs": 5,
"mode": "image",
"task": "detection",
"precision": "fp16"
}
},
"new": {
"label": "regressed result",
"backend_key": "tensorrt__jetson",
"compare_key": "yolov8n__b1__h640w640__fp16",
"mean_ms": 16.1,
"p99_ms": 22.0,
"fps_value": 62.112,
"run_config": {
"warmup": 1,
"runs": 5,
"mode": "image",
"task": "detection",
"precision": "fp16"
}
},
"latency_checks": {
"mean_latency": {
"status": "review",
"baseline_ms": 14.0,
"new_ms": 16.1,
"delta_ms": 2.1,
"delta_pct": 15.0,
"threshold_pct": 10.0
},
"p99_latency": {
"status": "review",
"baseline_ms": 15.5,
"new_ms": 22.0,
"delta_ms": 6.5,
"delta_pct": 41.935,
"threshold_pct": 20.0
},
"fps": {
"status": "review",
"baseline": 71.429,
"new": 62.112,
"delta": -9.317,
"delta_pct": -13.044,
"threshold_pct": 10.0
},
"run_config": {
"status": "passed",
"mismatch": false
}
},
"deployment_signal": {
"decision": "review_required",
"reason": "p99 latency regression detected",
"notes": "Same backend, precision, and run_config comparison shows the new TensorRT Jetson result exceeds the p99 regression threshold.",
"recommended_action": "Review runtime provenance and rerun the same Jetson benchmark before deployment."
}
}
41 changes: 41 additions & 0 deletions examples/studio_demo/normal_baseline_result.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"runtime_role": "runtime-result",
"model": "yolov8n.onnx",
"engine": "tensorrt",
"engine_backend": "tensorrt",
"device": "jetson",
"device_name": "jetson",
"precision": "fp16",
"batch": 1,
"height": 640,
"width": 640,
"mean_ms": 14.0,
"p99_ms": 15.5,
"fps_value": 71.429,
"success": true,
"status": "success",
"timestamp": "2026-04-30T12:10:00Z",
"compare_key": "yolov8n__b1__h640w640__fp16",
"backend_key": "tensorrt__jetson",
"system": {
"os": "Linux 5.15.148-tegra",
"machine": "aarch64"
},
"run_config": {
"warmup": 1,
"runs": 5,
"mode": "image",
"task": "detection",
"precision": "fp16"
},
"accuracy": {},
"extra": {
"problem_case_role": "latency_regression_baseline",
"input_mode": "image",
"input_preprocess": "opencv_bgr_to_rgb_resize_float32_nchw",
"manifest_applied": true,
"effective_batch": 1,
"effective_height": 640,
"effective_width": 640
}
}
34 changes: 33 additions & 1 deletion inferedgelab/studio/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"invalid_detection_structure_report.json",
"contract_shape_mismatch_report.json",
)
LATENCY_REGRESSION_SUMMARY = "latency_regression_summary.json"
DEMO_JOB_ID = "demo_yolov8n_trt_vs_onnx"
STATIC_ASSETS = {
"app.js": "application/javascript",
Expand Down Expand Up @@ -336,7 +337,38 @@ def _load_demo_evaluation_report() -> dict[str, Any]:


def _load_demo_problem_cases() -> list[dict[str, Any]]:
return [_load_problem_report(file_name) for file_name in DEMO_PROBLEM_REPORTS]
cases = [_load_problem_report(file_name) for file_name in DEMO_PROBLEM_REPORTS]
cases.append(_load_latency_regression_summary())
return cases


def _load_latency_regression_summary() -> dict[str, Any]:
path = DEMO_EVIDENCE_DIR / LATENCY_REGRESSION_SUMMARY
try:
summary = json.loads(path.read_text(encoding="utf-8"))
except OSError as exc:
raise HTTPException(status_code=500, detail=f"latency regression summary not found: {LATENCY_REGRESSION_SUMMARY}") from exc
except json.JSONDecodeError as exc:
raise HTTPException(status_code=500, detail=f"latency regression summary is invalid JSON: {LATENCY_REGRESSION_SUMMARY}") from exc

problem_case = summary.get("problem_case") if isinstance(summary, dict) else None
deployment_signal = summary.get("deployment_signal") if isinstance(summary, dict) else None
latency_checks = summary.get("latency_checks") if isinstance(summary, dict) else None
if not isinstance(problem_case, str) or not isinstance(deployment_signal, dict) or not isinstance(latency_checks, dict):
raise HTTPException(status_code=500, detail=f"latency regression summary schema error: {LATENCY_REGRESSION_SUMMARY}")

return {
"problem_case": problem_case,
"problem_case_type": summary.get("problem_case_type") or "runtime_latency",
"source": f"examples/studio_demo/{LATENCY_REGRESSION_SUMMARY}",
"baseline_source": summary.get("baseline_source"),
"new_source": summary.get("new_source"),
"policy": summary.get("policy") or {},
"baseline": summary.get("baseline") or {},
"new": summary.get("new") or {},
"latency_checks": latency_checks,
"deployment_signal": deployment_signal,
}


def _load_problem_report(file_name: str) -> dict[str, Any]:
Expand Down
30 changes: 22 additions & 8 deletions inferedgelab/studio/static/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -404,24 +404,31 @@ function renderDemoProblemCases(problemCases = []) {

problemCases.forEach((problem) => {
const signal = problem.deployment_signal || {};
const structural = problem.structural_validation || {};
const contractShape = problem.contract_validation?.input_shape || {};
const accuracy = problem.accuracy || {};
const card = createElement("article", `problem-case ${decisionTone(signal.decision)}`);
card.append(
createElement("p", "caption", problem.problem_case || "problem case"),
createElement("h4", "", String(signal.decision || "review").toUpperCase()),
createElement("p", "body-text", signal.reason || "Validation evidence requires review."),
createElement(
"p",
"caption",
`accuracy=${accuracy.status || "-"} / structure=${structural.status || "-"} / contract=${contractShape.status || "-"}`,
),
createElement("p", "caption", problemCaseDetail(problem)),
);
target.append(card);
});
}

function problemCaseDetail(problem = {}) {
if (problem.problem_case_type === "runtime_latency" || problem.latency_checks) {
const checks = problem.latency_checks || {};
const mean = checks.mean_latency?.delta_pct;
const p99 = checks.p99_latency?.delta_pct;
const fps = checks.fps?.delta_pct;
return `mean=${formatPercent(mean)} / p99=${formatPercent(p99)} / fps=${formatPercent(fps)} / run_config=${checks.run_config?.status || "-"}`;
}
const structural = problem.structural_validation || {};
const contractShape = problem.contract_validation?.input_shape || {};
const accuracy = problem.accuracy || {};
return `accuracy=${accuracy.status || "-"} / structure=${structural.status || "-"} / contract=${contractShape.status || "-"}`;
}

function renderDemoEvaluation(report) {
const target = document.querySelector("#demo-report-summary");
if (!target) {
Expand Down Expand Up @@ -1037,6 +1044,13 @@ function formatNumber(value) {
return number.toFixed(3).replace(/\.?0+$/, "");
}

function formatPercent(value) {
if (value === undefined || value === null) {
return "-";
}
return `${formatNumber(value)}%`;
}

function formatValue(value) {
return displayValue(value);
}
Expand Down
13 changes: 11 additions & 2 deletions tests/test_studio_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,13 +356,21 @@ def test_studio_demo_evidence_loads_compare_ready_pair():
assert response["evaluation_report"]["accuracy"]["status"] == "evaluated"
assert response["evaluation_report"]["accuracy"]["metrics"]["map50"] > 0
assert response["evaluation_report"]["structural_validation"]["status"] == "passed"
assert len(response["problem_cases"]) == 3
assert len(response["problem_cases"]) == 4
assert {case["problem_case"] for case in response["problem_cases"]} == {
"annotation_missing",
"invalid_detection_structure",
"contract_shape_mismatch",
"latency_regression",
}
assert {case["deployment_signal"]["decision"] for case in response["problem_cases"]} == {"review", "blocked"}
assert {case["deployment_signal"]["decision"] for case in response["problem_cases"]} == {
"review",
"blocked",
"review_required",
}
latency_case = next(case for case in response["problem_cases"] if case["problem_case"] == "latency_regression")
assert latency_case["latency_checks"]["p99_latency"]["delta_pct"] > 20
assert latency_case["deployment_signal"]["reason"] == "p99 latency regression detected"
assert compare["status"] == "ok"
assert compare["base"]["backend_key"] == "onnxruntime__cpu"
assert compare["new"]["backend_key"] == "tensorrt__jetson"
Expand All @@ -389,6 +397,7 @@ def test_studio_demo_evidence_is_listed_and_selectable_as_job():
assert detail["result"]["comparison"]["new"]["backend_key"] == "tensorrt__jetson"
assert detail["result"]["evaluation_report"]["accuracy"]["metrics"]["precision"] > 0
assert detail["result"]["problem_cases"][1]["structural_validation"]["status"] == "failed"
assert detail["result"]["problem_cases"][3]["problem_case"] == "latency_regression"


def test_studio_importing_two_compatible_results_returns_compare_data():
Expand Down
21 changes: 21 additions & 0 deletions tests/test_validation_demo_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,24 @@ def test_validation_problem_case_reports_cover_review_and_blocked_paths():
assert reports["invalid_detection_structure_report.json"]["deployment_signal"]["decision"] == "blocked"
assert reports["contract_shape_mismatch_report.json"]["contract_validation"]["input_shape"]["status"] == "mismatch"
assert reports["contract_shape_mismatch_report.json"]["deployment_signal"]["decision"] == "blocked"


def test_latency_regression_problem_case_records_review_signal():
repo_root = Path(__file__).resolve().parents[1]
summary_path = repo_root / "examples" / "studio_demo" / "latency_regression_summary.json"
baseline_path = repo_root / "examples" / "studio_demo" / "normal_baseline_result.json"
regression_path = repo_root / "examples" / "studio_demo" / "latency_regression_result.json"

summary = json.loads(summary_path.read_text(encoding="utf-8"))
baseline = json.loads(baseline_path.read_text(encoding="utf-8"))
regression = json.loads(regression_path.read_text(encoding="utf-8"))

assert summary["problem_case"] == "latency_regression"
assert summary["deployment_signal"]["decision"] == "review_required"
assert summary["deployment_signal"]["reason"] == "p99 latency regression detected"
assert summary["latency_checks"]["mean_latency"]["delta_pct"] >= 10.0
assert summary["latency_checks"]["p99_latency"]["delta_pct"] >= 20.0
assert summary["latency_checks"]["run_config"]["status"] == "passed"
assert baseline["backend_key"] == regression["backend_key"] == "tensorrt__jetson"
assert baseline["compare_key"] == regression["compare_key"]
assert baseline["run_config"] == regression["run_config"]
Loading