Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,13 @@ The same evidence can be exported from the CLI without opening the browser:
```bash
poetry run inferedgelab demo-evidence-summary
poetry run inferedgelab demo-evidence-summary --format json
poetry run inferedgelab portfolio-demo-check
poetry run inferedgelab export-demo-evidence --output reports/studio_demo_evidence.md
```

`portfolio-demo-check` is the pre-submission guardrail for this portfolio demo.
It validates the committed Studio fixtures, expected README/PPT metrics, portfolio docs, and local Studio assets without starting workers, queues, databases, or a production SaaS service.

![InferEdge Local Studio demo evidence](assets/images/local-studio-demo-evidence.png)

Verified demo fixture values:
Expand Down
4 changes: 4 additions & 0 deletions inferedgelab/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from inferedgelab.commands.enrich_result import enrich_result_cmd
from inferedgelab.commands.demo_evidence import demo_evidence_summary_cmd
from inferedgelab.commands.demo_evidence import export_demo_evidence_cmd
from inferedgelab.commands.demo_evidence import portfolio_demo_check_cmd
from inferedgelab.commands.list_results import list_results_cmd
from inferedgelab.commands.history_report import history_report_cmd
from inferedgelab.commands.serve import serve_cmd
Expand Down Expand Up @@ -44,6 +45,9 @@ def version_cmd() -> None:
app.command("enrich-result", help="Attach accuracy metadata to an existing structured benchmark result")(enrich_result_cmd)
app.command("demo-evidence-summary", help="Print Local Studio demo evidence summary")(demo_evidence_summary_cmd)
app.command("export-demo-evidence", help="Export Local Studio demo evidence as Markdown")(export_demo_evidence_cmd)
app.command("portfolio-demo-check", help="Validate committed portfolio demo evidence before submission")(
portfolio_demo_check_cmd
)
app.command("list-results", help="List recent structured benchmark results")(list_results_cmd)
app.command("history-report", help="Generate HTML history report from structured benchmark results")(history_report_cmd)
app.command("serve", help="Run InferEdgeLab FastAPI server")(serve_cmd)
Expand Down
19 changes: 19 additions & 0 deletions inferedgelab/commands/demo_evidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
build_demo_evidence_markdown,
build_demo_evidence_summary,
build_demo_evidence_summary_text,
build_portfolio_demo_check,
build_portfolio_demo_check_text,
demo_evidence_summary_json,
write_demo_evidence_markdown,
)
Expand Down Expand Up @@ -48,3 +50,20 @@ def export_demo_evidence_cmd(
) -> None:
path = write_demo_evidence_markdown(output)
rprint(f"[green]Saved[/green]: {path}")


def portfolio_demo_check_cmd(
format: str = typer.Option("text", "--format", "-f", help="text/json"),
repo_root: str = typer.Option(".", "--repo-root", help="Repository root to check"),
) -> None:
report = build_portfolio_demo_check(repo_root=repo_root)
normalized_format = format.strip().lower()
if normalized_format == "text":
print(build_portfolio_demo_check_text(report), end="")
elif normalized_format == "json":
print(demo_evidence_summary_json(report), end="")
else:
raise typer.BadParameter("--format must be one of: text, json")

if report["status"] != "pass":
raise typer.Exit(code=1)
270 changes: 270 additions & 0 deletions inferedgelab/services/demo_evidence_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,44 @@

IN_MEMORY_NOTE = "Local Studio demo evidence is in-memory and resets when the server restarts."
DEMO_ANNOTATIONS_FILE = "yolov8_coco_subset_annotations.json"
PORTFOLIO_CHECK_SCHEMA_VERSION = "inferedgelab-portfolio-demo-check-v1"

EXPECTED_PORTFOLIO_METRICS = {
"onnxruntime_cpu.mean_ms": 45.4299,
"onnxruntime_cpu.p99_ms": 49.2128,
"onnxruntime_cpu.fps": 22.0119,
"tensorrt_jetson_fp16_25w.mean_ms": 10.066401,
"tensorrt_jetson_fp16_25w.p95_ms": 15.476641,
"tensorrt_jetson_fp16_25w.p99_ms": 15.548438,
"tensorrt_jetson_fp16_25w.fps": 99.340373,
"tensorrt_jetson_fp16_15w.mean_ms": 10.799106,
"tensorrt_jetson_fp16_15w.p99_ms": 15.529218,
"tensorrt_jetson_fp16_15w.fps": 92.600262,
"comparison.speedup": 4.513023075476529,
"evaluation_report.sample_count": 10,
"evaluation_report.ground_truth_boxes": 89,
"evaluation_report.map50": 0.1409784036,
"evaluation_report.precision": 0.2941176471,
"evaluation_report.recall": 0.1685393258,
}

REQUIRED_PORTFOLIO_FILES = [
"README.md",
"assets/images/local-studio-demo-evidence.png",
"docs/portfolio/final_validation_completion.md",
"docs/portfolio/runtime_compare_yolov8n.md",
"docs/portfolio/yolov8_coco_subset_evaluation.md",
"docs/portfolio/validation_problem_cases.md",
"examples/studio_demo/onnxruntime_cpu_result.json",
"examples/studio_demo/tensorrt_jetson_25w_result.json",
"examples/studio_demo/tensorrt_jetson_15w_result.json",
"examples/studio_demo/aiguard_portfolio_cases.json",
"examples/studio_demo/jetson_power_mode_summary.json",
"examples/validation_demo/subset/yolov8_coco_subset_evaluation.json",
"inferedgelab/studio/static/index.html",
"inferedgelab/studio/static/app.js",
"inferedgelab/studio/static/style.css",
]


def build_demo_evidence_summary() -> dict[str, Any]:
Expand Down Expand Up @@ -214,6 +252,164 @@ def write_demo_evidence_markdown(output: str | Path) -> Path:
return path


def build_portfolio_demo_check(repo_root: str | Path | None = None) -> dict[str, Any]:
"""Validate the committed portfolio demo evidence surface.

This is a pre-submission smoke check. It reads committed local fixtures and
documentation references only; it does not start Studio, run workers, or
mutate result/compare schemas.
"""

root = Path(repo_root) if repo_root is not None else Path.cwd()
summary = build_demo_evidence_summary()
checks: list[dict[str, Any]] = []

for relative_path in REQUIRED_PORTFOLIO_FILES:
path = root / relative_path
checks.append(
_check_item(
name=f"file:{relative_path}",
passed=path.is_file(),
details=str(path),
category="required_file",
)
)

runtime = summary["runtime_evidence"]
metric_sources = {
"onnxruntime_cpu": runtime["onnxruntime_cpu"],
"tensorrt_jetson_fp16_25w": runtime["tensorrt_jetson_fp16_25w"],
"tensorrt_jetson_fp16_15w": runtime["tensorrt_jetson_fp16_15w"],
"comparison": summary["comparison"],
"evaluation_report": summary["evaluation_report"],
}
for metric_path, expected in EXPECTED_PORTFOLIO_METRICS.items():
group, field = metric_path.split(".", 1)
observed = _number(metric_sources[group].get(field))
passed = _close_enough(observed, expected)
checks.append(
_check_item(
name=f"metric:{metric_path}",
passed=passed,
expected=expected,
observed=observed,
category="metric",
)
)

checks.append(
_check_item(
name="studio:in_memory_note",
passed=summary["in_memory_note"] == IN_MEMORY_NOTE,
expected=IN_MEMORY_NOTE,
observed=summary["in_memory_note"],
category="contract_note",
)
)
checks.append(
_check_item(
name="decision:lab_owner_review_required",
passed=summary["deployment_decision"].get("decision") == "review_required",
expected="review_required",
observed=summary["deployment_decision"].get("decision"),
category="deployment_decision",
)
)

problem_cases = {case["problem_case"] for case in summary["problem_cases"]}
expected_problem_cases = {
"annotation_missing",
"invalid_detection_structure",
"contract_shape_mismatch",
"latency_regression",
}
checks.append(
_check_item(
name="problem_cases:portfolio_bundle",
passed=problem_cases == expected_problem_cases,
expected=sorted(expected_problem_cases),
observed=sorted(problem_cases),
category="problem_case",
)
)

aiguard_cases = summary["aiguard_cases"]
guard_verdicts = {case["guard_verdict"] for case in aiguard_cases}
checks.append(
_check_item(
name="aiguard:portfolio_case_count",
passed=len(aiguard_cases) == 4,
expected=4,
observed=len(aiguard_cases),
category="aiguard",
)
)
checks.append(
_check_item(
name="aiguard:portfolio_verdicts",
passed={"pass", "blocked", "review_required"}.issubset(guard_verdicts),
expected=["blocked", "pass", "review_required"],
observed=sorted(guard_verdicts),
category="aiguard",
)
)

docs_checks = _documentation_reference_checks(root)
checks.extend(docs_checks)

failed = [check for check in checks if not check["passed"]]
return {
"schema_version": PORTFOLIO_CHECK_SCHEMA_VERSION,
"status": "pass" if not failed else "fail",
"generated_at": _utc_now_iso(),
"repo_root": str(root),
"check_count": len(checks),
"failed_count": len(failed),
"checks": checks,
"core_metrics": {
"tensorrt_jetson_fp16_25w_mean_ms": runtime["tensorrt_jetson_fp16_25w"]["mean_ms"],
"onnxruntime_cpu_mean_ms": runtime["onnxruntime_cpu"]["mean_ms"],
"speedup": summary["comparison"]["speedup"],
"tensorrt_fps": runtime["tensorrt_jetson_fp16_25w"]["fps"],
"onnxruntime_fps": runtime["onnxruntime_cpu"]["fps"],
"map50": summary["evaluation_report"]["map50"],
"ground_truth_boxes": summary["evaluation_report"]["ground_truth_boxes"],
},
"notes": [
IN_MEMORY_NOTE,
"This check validates committed portfolio evidence only; it does not run production workers or external queues.",
"Lab remains the deployment decision owner; AIGuard remains optional deterministic diagnosis evidence.",
],
}


def build_portfolio_demo_check_text(report: dict[str, Any] | None = None) -> str:
report = report or build_portfolio_demo_check()
metrics = report["core_metrics"]
lines = [
"InferEdgeLab Portfolio Demo Check",
f"status: {report['status']}",
f"checks: {report['check_count']} total / {report['failed_count']} failed",
f"TensorRT Jetson FP16 25W mean_ms: {_fmt_number(metrics['tensorrt_jetson_fp16_25w_mean_ms'])}",
f"ONNX Runtime CPU mean_ms: {_fmt_number(metrics['onnxruntime_cpu_mean_ms'])}",
f"speedup: {_fmt_number(metrics['speedup'])}x faster",
f"TensorRT FPS: {_fmt_number(metrics['tensorrt_fps'])}",
f"ONNX Runtime FPS: {_fmt_number(metrics['onnxruntime_fps'])}",
f"YOLOv8 subset mAP@50: {_fmt_number(metrics['map50'])}",
f"YOLOv8 subset GT boxes: {metrics['ground_truth_boxes']}",
]
failed = [check for check in report["checks"] if not check["passed"]]
if failed:
lines.append("")
lines.append("Failed checks:")
for check in failed:
lines.append(f"- {check['name']}: expected={check.get('expected')} observed={check.get('observed')}")
else:
lines.append("All portfolio demo evidence checks passed.")
lines.append("")
return "\n".join(lines)


def demo_evidence_summary_json(summary: dict[str, Any] | None = None) -> str:
return json.dumps(summary or build_demo_evidence_summary(), ensure_ascii=False, indent=2) + "\n"

Expand Down Expand Up @@ -365,6 +561,80 @@ def _power_mode_summary(summary: dict[str, Any]) -> dict[str, Any]:
}


def _documentation_reference_checks(root: Path) -> list[dict[str, Any]]:
required_snippets = {
"README.md": [
"10.066401",
"45.4299",
"4.51x",
"Local Studio",
"in-memory",
],
"docs/portfolio/final_validation_completion.md": [
"10.066401",
"45.4299",
"4.51x",
],
"docs/portfolio/runtime_compare_yolov8n.md": [
"10.066401",
"45.4299",
"4.51x",
],
"docs/portfolio/yolov8_coco_subset_evaluation.md": [
"0.1410",
"0.2941",
"0.1685",
],
}
checks: list[dict[str, Any]] = []
for relative_path, snippets in required_snippets.items():
path = root / relative_path
try:
text = path.read_text(encoding="utf-8")
except OSError:
text = ""
for snippet in snippets:
checks.append(
_check_item(
name=f"doc:{relative_path}:{snippet}",
passed=snippet in text,
expected=f"contains {snippet}",
observed="present" if snippet in text else "missing",
category="documentation_reference",
)
)
return checks


def _check_item(
*,
name: str,
passed: bool,
category: str,
expected: Any | None = None,
observed: Any | None = None,
details: str | None = None,
) -> dict[str, Any]:
item = {
"name": name,
"category": category,
"passed": bool(passed),
}
if expected is not None:
item["expected"] = expected
if observed is not None:
item["observed"] = observed
if details is not None:
item["details"] = details
return item


def _close_enough(observed: float | None, expected: float) -> bool:
if observed is None:
return False
return abs(observed - expected) <= max(1e-6, abs(expected) * 1e-6)


def _demo_ground_truth_box_count() -> int | None:
path = VALIDATION_DEMO_DIR / DEMO_ANNOTATIONS_FILE
try:
Expand Down
25 changes: 25 additions & 0 deletions tests/test_demo_evidence_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@

from inferedgelab.commands.demo_evidence import demo_evidence_summary_cmd
from inferedgelab.commands.demo_evidence import export_demo_evidence_cmd
from inferedgelab.commands.demo_evidence import portfolio_demo_check_cmd
from inferedgelab.services.demo_evidence_report import (
IN_MEMORY_NOTE,
build_demo_evidence_markdown,
build_demo_evidence_summary,
build_portfolio_demo_check,
)


Expand Down Expand Up @@ -84,3 +86,26 @@ def test_export_demo_evidence_command_writes_markdown(tmp_path, capsys):
assert "Saved" in output
assert "# InferEdge Local Studio Demo Evidence Report" in markdown
assert IN_MEMORY_NOTE in markdown


def test_portfolio_demo_check_passes_for_committed_evidence():
report = build_portfolio_demo_check()

assert report["schema_version"] == "inferedgelab-portfolio-demo-check-v1"
assert report["status"] == "pass"
assert report["failed_count"] == 0
assert report["core_metrics"]["tensorrt_jetson_fp16_25w_mean_ms"] == pytest.approx(10.066401)
assert report["core_metrics"]["onnxruntime_cpu_mean_ms"] == pytest.approx(45.4299)
assert report["core_metrics"]["speedup"] == pytest.approx(4.513023, rel=1e-5)
assert any(check["name"] == "aiguard:portfolio_case_count" for check in report["checks"])
assert any(check["name"] == "problem_cases:portfolio_bundle" for check in report["checks"])


def test_portfolio_demo_check_command_outputs_json(capsys):
portfolio_demo_check_cmd(format="json", repo_root=".")
out = capsys.readouterr().out
report = json.loads(out)

assert report["schema_version"] == "inferedgelab-portfolio-demo-check-v1"
assert report["status"] == "pass"
assert report["failed_count"] == 0
Loading