diff --git a/inferedgelab/studio/routes.py b/inferedgelab/studio/routes.py
index b5a08b6..026025d 100644
--- a/inferedgelab/studio/routes.py
+++ b/inferedgelab/studio/routes.py
@@ -102,7 +102,11 @@ def studio_job_detail(request: Request, job_id: str) -> dict[str, Any]:
def studio_compare_latest(request: Request) -> dict[str, Any]:
imported_results = _get_imported_results(request)
if len(imported_results) >= 2:
- return _build_imported_compare_response(imported_results[-2], imported_results[-1])
+ return _build_imported_compare_response(
+ imported_results[-2],
+ imported_results[-1],
+ guard_analysis=_get_studio_guard_analysis(request),
+ )
endpoint = _get_api_endpoint(request.app, "/api/compare-latest")
try:
@@ -153,6 +157,7 @@ def studio_import(request: Request, payload: dict[str, Any] = Body(...)) -> dict
result = _apply_backend_override(result, payload.get("backend_override"))
imported_results = _get_imported_results(request)
imported_results.append(result)
+ request.app.state.studio_guard_analysis = None
return {
"status": "imported",
"source": "studio-memory",
@@ -169,7 +174,13 @@ def studio_demo_evidence(request: Request) -> dict[str, Any]:
problem_cases = _load_demo_problem_cases()
imported_results = _get_imported_results(request)
imported_results.extend(results)
- compare = _build_imported_compare_response(results[0], results[1])
+ guard_analysis = _build_demo_guard_analysis(results, evaluation_report)
+ request.app.state.studio_guard_analysis = guard_analysis
+ compare = _build_imported_compare_response(
+ results[0],
+ results[1],
+ guard_analysis=guard_analysis,
+ )
demo_job = _build_demo_job(results, compare, evaluation_report, problem_cases)
_get_demo_jobs(request)[DEMO_JOB_ID] = demo_job
return {
@@ -183,6 +194,7 @@ def studio_demo_evidence(request: Request) -> dict[str, Any]:
"compare": compare,
"evaluation_report": evaluation_report,
"problem_cases": problem_cases,
+ "guard_analysis": guard_analysis,
"deployment_decision": compare["deployment_decision"],
}
@@ -217,6 +229,7 @@ def register_studio(app: FastAPI, job_store: Any | None = None) -> None:
app.state.studio_job_store = job_store
app.state.studio_imported_results = []
app.state.studio_demo_jobs = {}
+ app.state.studio_guard_analysis = None
app.include_router(router)
@@ -240,6 +253,11 @@ def _get_demo_jobs(request: Request) -> dict[str, dict[str, Any]]:
return demo_jobs
+def _get_studio_guard_analysis(request: Request) -> dict[str, Any] | None:
+ guard_analysis = getattr(request.app.state, "studio_guard_analysis", None)
+ return guard_analysis if isinstance(guard_analysis, dict) else None
+
+
def _get_api_endpoint(app: FastAPI, path: str) -> Any:
for route in app.routes:
if getattr(route, "path", None) == path:
@@ -269,25 +287,33 @@ def _load_import_payload(payload: dict[str, Any]) -> dict[str, Any]:
return _with_compare_keys(result)
-def _build_imported_compare_response(base: dict[str, Any], new: dict[str, Any]) -> dict[str, Any]:
+def _build_imported_compare_response(
+ base: dict[str, Any],
+ new: dict[str, Any],
+ guard_analysis: dict[str, Any] | None = None,
+) -> dict[str, Any]:
result = compare_results(base, new)
judgement = judge_comparison(result)
- deployment_decision = build_deployment_decision(judgement)
+ deployment_decision = build_deployment_decision(judgement, guard_analysis=guard_analysis)
+ data = {
+ "base": base,
+ "new": new,
+ "result": result,
+ "judgement": judgement,
+ "deployment_decision": deployment_decision,
+ }
+ if guard_analysis is not None:
+ data["guard_analysis"] = guard_analysis
return {
"status": "ok",
"source": "studio-memory",
- "data": {
- "base": base,
- "new": new,
- "result": result,
- "judgement": judgement,
- "deployment_decision": deployment_decision,
- },
+ "data": data,
"base": base,
"new": new,
"result": result,
"judgement": judgement,
"deployment_decision": deployment_decision,
+ **({"guard_analysis": guard_analysis} if guard_analysis is not None else {}),
}
@@ -421,6 +447,7 @@ def _build_demo_job(
"runtime_result": runtime_result,
"comparison": compare,
"deployment_decision": compare["deployment_decision"],
+ "guard_analysis": compare.get("guard_analysis"),
"evaluation_report": evaluation_report,
"problem_cases": problem_cases,
"summary": compare["judgement"]["summary"],
@@ -434,6 +461,107 @@ def _build_demo_job(
}
+def _build_demo_guard_analysis(
+ results: list[dict[str, Any]],
+ evaluation_report: dict[str, Any],
+) -> dict[str, Any]:
+ baseline = results[0] if results else {}
+ candidate = results[-1] if results else {}
+ accuracy_metrics = evaluation_report.get("accuracy", {}).get("metrics", {})
+ structural = evaluation_report.get("structural_validation") or {}
+ contract = evaluation_report.get("contract_validation", {}).get("input_shape") or {}
+ map50 = accuracy_metrics.get("map50")
+ precision = accuracy_metrics.get("precision")
+ recall = accuracy_metrics.get("recall")
+ verdict = "review_required" if isinstance(map50, (int, float)) and map50 < 0.2 else "pass"
+ severity = "medium" if verdict == "review_required" else "low"
+ source = {
+ "runtime_result_path": candidate.get("_source_path") or "examples/studio_demo/tensorrt_jetson_result.json",
+ "baseline_profile_path": baseline.get("_source_path") or "examples/studio_demo/onnxruntime_cpu_result.json",
+ "evaluation_report_path": evaluation_report.get("source"),
+ "model_contract_path": "examples/validation_demo/subset/model_contract.json",
+ "lab_result_path": "studio.demo_evidence",
+ }
+ evidence = [
+ {
+ "type": "accuracy_signal",
+ "metric_name": "map50",
+ "observed_value": map50,
+ "baseline_value": None,
+ "threshold": 0.2,
+ "severity": severity,
+ "status": "warning" if verdict == "review_required" else "passed",
+ "explanation": (
+ "Demo mAP50 is below the review threshold, so Lab should keep this as validation evidence "
+ "instead of treating latency speedup alone as deployment-ready."
+ ),
+ "why_it_matters": "Latency improvement does not prove detection quality is deployment-ready.",
+ "suspected_causes": [
+ "Small validation subset",
+ "Model/preset calibration gap",
+ "Postprocess or threshold tuning needed",
+ ],
+ "recommendation": "Review accuracy evidence with a larger validation subset before deployment.",
+ "raw_context": {
+ "precision": precision,
+ "recall": recall,
+ "structural_status": structural.get("status"),
+ "contract_status": contract.get("status"),
+ },
+ },
+ {
+ "type": "contract_validation",
+ "metric_name": "input_shape_status",
+ "observed_value": contract.get("status"),
+ "baseline_value": "passed",
+ "threshold": "passed",
+ "severity": "low" if contract.get("status") == "passed" else "high",
+ "status": "passed" if contract.get("status") == "passed" else "failed",
+ "explanation": "The demo model contract input shape check is recorded as structured evidence.",
+ "why_it_matters": "Contract mismatch can make accuracy metrics unreliable.",
+ "suspected_causes": [],
+ "recommendation": "Keep model_contract evidence attached to the Lab report.",
+ "raw_context": contract,
+ },
+ ]
+ return {
+ "schema_version": "inferedge-aiguard-diagnosis-v1",
+ "source": source,
+ "guard_verdict": verdict,
+ "severity": severity,
+ "confidence": 0.82,
+ "primary_reason": (
+ "Latency improved, but demo accuracy evidence still requires review."
+ if verdict == "review_required"
+ else "Demo validation evidence is within configured Guard thresholds."
+ ),
+ "evidence": evidence,
+ "suspected_causes": [
+ "Small validation subset",
+ "Detection threshold tuning needed",
+ ]
+ if verdict == "review_required"
+ else [],
+ "recommendations": [
+ "Use this demo as portfolio evidence, then validate with a larger representative dataset before deployment.",
+ "Keep AIGuard evidence optional and let Lab own the final deployment decision.",
+ ],
+ "thresholds": {"map50_review": 0.2},
+ "baseline_summary": {
+ "backend_key": baseline.get("backend_key"),
+ "mean_ms": baseline.get("mean_ms"),
+ "p99_ms": baseline.get("p99_ms"),
+ },
+ "candidate_summary": {
+ "backend_key": candidate.get("backend_key"),
+ "mean_ms": candidate.get("mean_ms"),
+ "p99_ms": candidate.get("p99_ms"),
+ "map50": map50,
+ },
+ "created_at": _utc_now_iso(),
+ }
+
+
def _build_analyze_display_name(job: dict[str, Any]) -> str:
input_summary = job.get("input_summary") or {}
model_path = _first_display_value(input_summary.get("model_path"), input_summary.get("artifact_path"))
diff --git a/inferedgelab/studio/static/app.js b/inferedgelab/studio/static/app.js
index 5d17ff7..0987777 100644
--- a/inferedgelab/studio/static/app.js
+++ b/inferedgelab/studio/static/app.js
@@ -30,6 +30,7 @@ let activeDecision = null;
let importedResult = null;
let demoEvaluationReport = null;
let demoProblemCases = [];
+let activeGuardAnalysis = null;
const importedResultsByJobId = {};
function createElement(tagName, className, textContent) {
@@ -197,6 +198,7 @@ async function loadJobDetail(jobId) {
renderJobDetail();
renderJobList();
updateDecision(extractDecision(selectedJob));
+ updateGuardEvidence(extractGuardAnalysis(selectedJob));
} catch (error) {
selectedJob = null;
renderJobDetail(`Unable to load ${jobId}: ${formatError(error)}`);
@@ -210,6 +212,7 @@ async function loadCompare() {
compareData = await fetchJson("/studio/api/compare/latest");
renderCompare();
const decision = extractDecision(compareData);
+ updateGuardEvidence(extractGuardAnalysis(compareData));
if (compareData.status === "empty") {
activeDecision = null;
renderDecision(decision);
@@ -219,6 +222,7 @@ async function loadCompare() {
} catch (error) {
compareData = { status: "error", error: formatError(error) };
renderCompare();
+ updateGuardEvidence(null);
if (!activeDecision) {
updateDecision(null);
}
@@ -370,6 +374,7 @@ async function loadDemoEvidence() {
demoEvaluationReport = payload.evaluation_report || null;
demoProblemCases = Array.isArray(payload.problem_cases) ? payload.problem_cases : [];
compareData = payload.compare || null;
+ updateGuardEvidence(payload.guard_analysis || payload.compare?.guard_analysis || null);
selectedJobId = payload.job_id || payload.job?.job_id || selectedJobId;
selectedJob = payload.job || selectedJob;
setState("#demo-state", "completed");
@@ -731,6 +736,123 @@ function updateDecision(decision) {
renderDecision(decision);
}
+function renderGuardEvidence(guardAnalysis) {
+ const target = document.querySelector("#guard-evidence-panel");
+ if (!target) {
+ return;
+ }
+ target.replaceChildren();
+
+ if (!guardAnalysis) {
+ target.className = "guard-panel idle";
+ target.append(
+ createElement("p", "caption", "AIGuard"),
+ createElement("h3", "", "OPTIONAL"),
+ createElement("p", "body-text", "No AIGuard diagnosis evidence is loaded for this local workflow yet."),
+ createElement("p", "caption", "Load Demo Evidence or run compare with guard-backed diagnosis evidence."),
+ );
+ return;
+ }
+
+ const verdict = guardVerdict(guardAnalysis);
+ target.className = `guard-panel ${decisionTone(verdict)}`;
+ target.append(
+ createElement("p", "caption", "AIGuard diagnosis evidence"),
+ createElement("h3", "", verdict.toUpperCase()),
+ createElement("p", "body-text", guardAnalysis.primary_reason || guardAnalysis.reason || "Guard evidence is available."),
+ guardSummary(guardAnalysis, verdict),
+ );
+
+ const source = guardAnalysis.source || {};
+ if (Object.keys(source).length > 0) {
+ const sourcePanel = createElement("div", "guard-source");
+ sourcePanel.append(createElement("strong", "", "Source"));
+ Object.entries(source).forEach(([key, value]) => {
+ if (value !== undefined && value !== null && value !== "") {
+ sourcePanel.append(evidenceItem(key, value));
+ }
+ });
+ target.append(sourcePanel);
+ }
+
+ const evidence = guardEvidenceItems(guardAnalysis);
+ if (evidence.length > 0) {
+ const table = createElement("div", "guard-evidence-table");
+ table.append(guardEvidenceRow(["type", "metric", "observed", "threshold", "status"], true));
+ evidence.forEach((item) => {
+ table.append(
+ guardEvidenceRow([
+ item.type || "-",
+ item.metric_name || "-",
+ item.observed_value,
+ item.threshold,
+ item.status || item.severity || "-",
+ ]),
+ );
+ });
+ target.append(table);
+ target.append(guardExplanations(evidence));
+ }
+
+ target.append(guardList("Suspected causes", guardAnalysis.suspected_causes));
+ target.append(guardList("Recommendations", guardAnalysis.recommendations));
+}
+
+function updateGuardEvidence(guardAnalysis) {
+ activeGuardAnalysis = guardAnalysis || null;
+ renderGuardEvidence(activeGuardAnalysis);
+}
+
+function guardSummary(guardAnalysis, verdict) {
+ const summary = createElement("div", "guard-summary");
+ summary.append(
+ evidenceItem("guard_verdict", verdict),
+ evidenceItem("severity", guardAnalysis.severity || "-"),
+ evidenceItem("confidence", guardAnalysis.confidence ?? "-"),
+ evidenceItem("schema", guardAnalysis.schema_version || "legacy"),
+ );
+ return summary;
+}
+
+function guardEvidenceRow(values, heading = false) {
+ const row = createElement("div", heading ? "guard-row guard-row-heading" : "guard-row");
+ values.forEach((value) => row.append(createElement("span", "", formatValue(value))));
+ return row;
+}
+
+function guardExplanations(evidence) {
+ const explanations = createElement("div", "guard-explanations");
+ evidence.forEach((item) => {
+ if (!item.explanation && !item.recommendation) {
+ return;
+ }
+ const explanation = createElement("article", "detail-note");
+ explanation.append(
+ createElement("strong", "", item.metric_name || item.type || "evidence"),
+ createElement("p", "body-text", item.explanation || "-"),
+ );
+ if (item.recommendation) {
+ explanation.append(createElement("p", "caption", `Recommendation: ${item.recommendation}`));
+ }
+ explanations.append(explanation);
+ });
+ return explanations;
+}
+
+function guardList(title, values) {
+ const panel = createElement("div", "guard-list");
+ panel.append(createElement("strong", "", title));
+ const list = createElement("ul", "");
+ const items = Array.isArray(values) ? values : values ? [values] : [];
+ if (!items.length) {
+ list.append(createElement("li", "", "-"));
+ } else {
+ items.forEach((value) => list.append(createElement("li", "", formatValue(value))));
+ }
+ panel.append(list);
+ return panel;
+}
+
function metricTile(label, value) {
const tile = createElement("div", "metric-tile");
tile.append(createElement("span", "metric-name", label), createElement("span", "metric-value", value));
@@ -835,6 +957,50 @@ function extractDecision(payload) {
return payload.deployment_decision || payload.result?.deployment_decision || payload.data?.deployment_decision || null;
}
+function extractGuardAnalysis(payload) {
+ if (!payload) {
+ return null;
+ }
+ return (
+ payload.guard_analysis ||
+ payload.result?.guard_analysis ||
+ payload.data?.guard_analysis ||
+ payload.result?.comparison?.guard_analysis ||
+ payload.result?.comparison?.data?.guard_analysis ||
+ null
+ );
+}
+
+function guardVerdict(guardAnalysis = {}) {
+ if (guardAnalysis.guard_verdict) {
+ return String(guardAnalysis.guard_verdict);
+ }
+ const status = String(guardAnalysis.status || "").toLowerCase();
+ if (status === "ok") {
+ return "pass";
+ }
+ if (status === "warning") {
+ return "review_required";
+ }
+ if (status === "error") {
+ return "blocked";
+ }
+ if (status === "skipped") {
+ return "skipped";
+ }
+ return "unknown";
+}
+
+function guardEvidenceItems(guardAnalysis = {}) {
+ if (Array.isArray(guardAnalysis.evidence)) {
+ return guardAnalysis.evidence;
+ }
+ if (Array.isArray(guardAnalysis.anomalies)) {
+ return guardAnalysis.anomalies;
+ }
+ return [];
+}
+
function decisionReason(decision) {
const decisionName = String(decision?.decision || "unknown").toLowerCase();
if (decisionName === "unknown" && !decision?.guard_status) {
@@ -883,7 +1049,7 @@ function pipelineStatus() {
const anyCompleted = currentJobs.some((job) => job.status === "completed") || Boolean(importedResult);
const hasCompareDecision = Boolean(activeDecision);
const hasImportedEvidence = Boolean(importedResult);
- const hasGuardEvidence = Boolean(activeDecision?.guard_status);
+ const hasGuardEvidence = Boolean(activeGuardAnalysis || activeDecision?.guard_status || activeDecision?.guard_verdict);
return {
forge: importedResult ? "completed" : "idle",
runtime: hasImportedEvidence || anyCompleted ? "completed" : anyRunning ? "running" : "idle",
@@ -897,7 +1063,7 @@ function normalizeState(state) {
if (value === "queued") {
return "running";
}
- if (value === "completed" || value === "success" || value === "deployable") {
+ if (value === "completed" || value === "success" || value === "deployable" || value === "pass") {
return "completed";
}
if (value === "failed" || value === "blocked" || value === "error") {
@@ -1064,6 +1230,7 @@ async function initLocalStudio() {
renderJobDetail();
renderCompare();
updateDecision(null);
+ updateGuardEvidence(null);
await loadJobs();
await loadCompare();
await loadJetsonCommand();
diff --git a/inferedgelab/studio/static/index.html b/inferedgelab/studio/static/index.html
index 818aeda..bd07216 100644
--- a/inferedgelab/studio/static/index.html
+++ b/inferedgelab/studio/static/index.html
@@ -137,8 +137,8 @@
}
}
-
-
+
+
@@ -311,9 +311,20 @@ Compare View
-
+
05
+
+
AIGuard Evidence
+
Optional deterministic diagnosis evidence explains why a result should pass, require review, or be blocked.
+
+
+
+
+
+
+
+
06
Deployment Decision
Lab's local gate for deploy, review, or block. AIGuard evidence is optional and not required for this Studio flow.
@@ -333,7 +344,7 @@
Future Work
-
-
+
+