Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,17 @@ CLI / API → Service Layer → Structured Result → Compare / Report

---

## Contract-Based Validation

InferEdgeLab treats model evaluation as a **contract/preset-based validation workflow**, not as a claim that any arbitrary model can be automatically scored without context.
`evaluate-detection` now supports the `yolov8_coco` preset, optional `model_contract.json`, COCO annotations, YOLO txt labels, structural detection-output validation, and JSON/Markdown/HTML evaluation reports.
When annotations are not provided, accuracy is explicitly marked as `skipped` and the report records structural validation only.

Planned presets such as `resnet_imagenet` and `custom_contract` keep future evaluation work scoped to explicit model contracts and dataset assumptions.
Small normal/problem contract fixtures live under `examples/validation_demo/`.

---

## Key Results (Real Hardware Validation)

InferEdgeLab was validated on real edge hardware using YOLOv8 models.
Expand Down
26 changes: 26 additions & 0 deletions examples/validation_demo/coco_minimal_annotations.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"images": [
{
"id": 1,
"file_name": "sample.jpg",
"width": 640,
"height": 480
}
],
"categories": [
{
"id": 1,
"name": "person"
}
],
"annotations": [
{
"id": 1,
"image_id": 1,
"category_id": 1,
"bbox": [100.0, 120.0, 80.0, 60.0],
"area": 4800.0,
"iscrowd": 0
}
]
}
24 changes: 24 additions & 0 deletions examples/validation_demo/problem_model_contract.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"contract_version": "1",
"task": "object_detection",
"preset": "yolov8_coco",
"input": {
"name": "images",
"shape": [1, 3, 320, 320],
"format": "NCHW_RGB_FLOAT32_0_1"
},
"output": {
"name": "output0",
"type": "yolov8_detection",
"shape": [1, 84, 8400],
"format": "tensor"
},
"thresholds": {
"score": 0.25,
"iou": 0.5
},
"metadata": {
"demo_case": "problem",
"expected_issue": "Input shape intentionally differs from the YOLOv8 COCO preset input size."
}
}
26 changes: 26 additions & 0 deletions examples/validation_demo/yolov8_coco_model_contract.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"contract_version": "1",
"task": "object_detection",
"preset": "yolov8_coco",
"input": {
"name": "images",
"shape": [1, 3, 640, 640],
"format": "NCHW_RGB_FLOAT32_0_1",
"dtype": "float32"
},
"output": {
"name": "output0",
"type": "yolov8_detection",
"shape": [1, 84, 8400],
"format": "tensor",
"dtype": "float32"
},
"thresholds": {
"score": 0.25,
"iou": 0.5
},
"metadata": {
"demo_case": "normal",
"note": "Small contract fixture for contract/preset validation demos."
}
}
82 changes: 74 additions & 8 deletions inferedgelab/commands/evaluate_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,36 @@
from inferedgelab.result.saver import save_result
from inferedgelab.result.schema import BenchmarkResult
from inferedgelab.utils.system_info import collect_system_snapshot
from inferedgelab.validation.model_contract import (
ModelContractError,
build_default_contract,
load_model_contract,
)
from inferedgelab.validation.presets import get_preset, supported_presets
from inferedgelab.validation.report import build_evaluation_report, save_evaluation_report


def _exit_with_runtime_error(message: str) -> None:
rprint(f"[red]{message}[/red]")
raise typer.Exit(code=1)


def _option_string(value: object, default: str = "") -> str:
if isinstance(value, str):
return value
option_default = getattr(value, "default", default)
return option_default if isinstance(option_default, str) else default


def evaluate_detection_cmd(
model_path: str = typer.Argument(..., help="평가할 ONNX 모델 경로"),
engine: str = typer.Option("tensorrt", "--engine", help="추론 엔진 선택"),
engine_path: str = typer.Option("", "--engine-path", help="Runtime artifact 경로"),
image_dir: str = typer.Option(..., "--image-dir", help="평가 이미지 디렉토리"),
label_dir: str = typer.Option(..., "--label-dir", help="YOLO txt 라벨 디렉토리"),
label_dir: str = typer.Option("", "--label-dir", help="YOLO txt 라벨 디렉토리"),
coco_annotations: str = typer.Option("", "--coco-annotations", help="COCO annotation JSON 경로"),
preset: str = typer.Option("yolov8_coco", "--preset", help="Validation preset 이름"),
model_contract: str = typer.Option("", "--model-contract", help="model_contract.json 경로"),
num_classes: int = typer.Option(1, "--num-classes", help="클래스 수"),
precision: str = typer.Option("fp16", "--precision", help="precision 메타데이터 (fp32, fp16, int8)"),
conf_threshold: float = typer.Option(0.2, "--conf-threshold", help="confidence threshold"),
Expand All @@ -40,6 +57,9 @@ def evaluate_detection_cmd(
rgb: bool = typer.Option(True, "--rgb/--bgr", help="Use RGB input conversion after OpenCV read"),
debug_samples: int = typer.Option(0, "--debug-samples", help="Print internal debug output for the first N images"),
out_json: str = typer.Option("", "--out-json", help="Accuracy payload 저장 경로"),
report_json: str = typer.Option("", "--report-json", help="Evaluation report JSON 저장 경로"),
report_md: str = typer.Option("", "--report-md", help="Evaluation report Markdown 저장 경로"),
report_html: str = typer.Option("", "--report-html", help="Evaluation report HTML 저장 경로"),
out_dir: str = typer.Option("results", "--out-dir", help="structured result 저장 디렉토리"),
save_structured_result: bool = typer.Option(
True,
Expand All @@ -64,6 +84,23 @@ def evaluate_detection_cmd(

if num_classes <= 0:
raise typer.BadParameter("--num-classes must be >= 1")
coco_annotations = _option_string(coco_annotations)
preset = _option_string(preset, "yolov8_coco")
model_contract = _option_string(model_contract)
report_json = _option_string(report_json)
report_md = _option_string(report_md)
report_html = _option_string(report_html)
preset = preset.strip().lower()
try:
preset_def = get_preset(preset)
contract = (
load_model_contract(model_contract.strip(), default_preset=preset)
if model_contract.strip()
else build_default_contract(preset)
)
except (ValueError, ModelContractError) as exc:
supported = ", ".join(supported_presets())
raise typer.BadParameter(f"{exc} Supported presets: {supported}") from exc
if not isinstance(debug_samples, int):
debug_samples = int(getattr(debug_samples, "default", 0))
if debug_samples < 0:
Expand All @@ -75,7 +112,8 @@ def evaluate_detection_cmd(
engine_name=engine,
engine_path=engine_path.strip() or None,
image_dir=image_dir,
label_dir=label_dir,
label_dir=label_dir.strip() or None,
coco_annotations=coco_annotations.strip() or None,
num_classes=num_classes,
conf_threshold=conf_threshold,
nms_threshold=nms_threshold,
Expand Down Expand Up @@ -114,6 +152,9 @@ def evaluate_detection_cmd(
"task": "detection",
"engine": engine,
"engine_path": engine_path.strip() or None,
"preset": preset,
"model_contract_path": model_contract.strip() or None,
"coco_annotations": coco_annotations.strip() or None,
"num_classes": num_classes,
},
accuracy=accuracy_payload,
Expand All @@ -126,22 +167,47 @@ def evaluate_detection_cmd(
"evaluation_config": eval_result.evaluation_config,
"engine_path": engine_path.strip() or None,
"runtime_artifact_path": eval_result.extra.get("runtime_artifact_path"),
"structural_validation": eval_result.extra.get("structural_validation"),
"accuracy_status": eval_result.extra.get("accuracy_status", "evaluated"),
}
},
)
result_path = save_result(structured, out_dir=out_dir)

evaluation_report = build_evaluation_report(
eval_result=eval_result,
model_contract=contract,
preset=preset_def.to_dict(),
)
save_evaluation_report(
evaluation_report,
json_path=report_json,
markdown_path=report_md,
html_path=report_html,
)

rprint(f"Engine : {eval_result.engine}")
rprint(f"Images : {image_dir}")
rprint(f"Labels : {label_dir}")
rprint(f"Labels : {label_dir or '(not provided)'}")
rprint(f"COCO annotations: {coco_annotations or '(not provided)'}")
rprint(f"Samples : {eval_result.sample_count}")
rprint(f"Precision : {eval_result.metrics['precision']:.4f}")
rprint(f"Recall : {eval_result.metrics['recall']:.4f}")
rprint(f"F1 Score : {eval_result.metrics['f1_score']:.4f}")
rprint(f"mAP@50 : {eval_result.metrics['map50']:.4f}")
rprint(f"mAP@50-95 : {eval_result.metrics['map50_95']:.4f}")
rprint(f"Accuracy status : {eval_result.extra.get('accuracy_status', 'evaluated')}")
if eval_result.extra.get("accuracy_status") == "skipped":
rprint(f"Accuracy skipped: {eval_result.extra.get('accuracy_skip_reason')}")
else:
rprint(f"Precision : {eval_result.metrics['precision']:.4f}")
rprint(f"Recall : {eval_result.metrics['recall']:.4f}")
rprint(f"F1 Score : {eval_result.metrics['f1_score']:.4f}")
rprint(f"mAP@50 : {eval_result.metrics['map50']:.4f}")
rprint(f"mAP@50-95 : {eval_result.metrics['map50_95']:.4f}")

if saved_json_path:
rprint(f"[cyan]Saved accuracy[/cyan] : {saved_json_path}")
if report_json.strip():
rprint(f"[cyan]Saved evaluation JSON[/cyan]: {report_json}")
if report_md.strip():
rprint(f"[cyan]Saved evaluation Markdown[/cyan]: {report_md}")
if report_html.strip():
rprint(f"[cyan]Saved evaluation HTML[/cyan]: {report_html}")
if result_path:
rprint(f"[cyan]Saved structured result[/cyan]: {result_path}")
Loading
Loading