diff --git a/README.md b/README.md
index 648cf27..de7ca9d 100644
--- a/README.md
+++ b/README.md
@@ -188,6 +188,17 @@ CLI / API → Service Layer → Structured Result → Compare / Report
 
 ---
 
+## Contract-Based Validation
+
+InferEdgeLab treats model evaluation as a **contract/preset-based validation workflow**, not as a claim that any arbitrary model can be automatically scored without context.
+`evaluate-detection` now supports the `yolov8_coco` preset, optional `model_contract.json`, COCO annotations, YOLO txt labels, structural detection-output validation, and JSON/Markdown/HTML evaluation reports.
+When annotations are not provided, accuracy is explicitly marked as `skipped` and the report records structural validation only.
+
+Planned presets such as `resnet_imagenet` and `custom_contract` keep future evaluation work scoped to explicit model contracts and dataset assumptions.
+Small normal/problem contract fixtures live under `examples/validation_demo/`.
+
+---
+
 ## Key Results (Real Hardware Validation)
 
 InferEdgeLab was validated on real edge hardware using YOLOv8 models.
diff --git a/examples/validation_demo/coco_minimal_annotations.json b/examples/validation_demo/coco_minimal_annotations.json
new file mode 100644
index 0000000..7bf04bc
--- /dev/null
+++ b/examples/validation_demo/coco_minimal_annotations.json
@@ -0,0 +1,26 @@
+{
+  "images": [
+    {
+      "id": 1,
+      "file_name": "sample.jpg",
+      "width": 640,
+      "height": 480
+    }
+  ],
+  "categories": [
+    {
+      "id": 1,
+      "name": "person"
+    }
+  ],
+  "annotations": [
+    {
+      "id": 1,
+      "image_id": 1,
+      "category_id": 1,
+      "bbox": [100.0, 120.0, 80.0, 60.0],
+      "area": 4800.0,
+      "iscrowd": 0
+    }
+  ]
+}
diff --git a/examples/validation_demo/problem_model_contract.json b/examples/validation_demo/problem_model_contract.json
new file mode 100644
index 0000000..e98b96c
--- /dev/null
+++ b/examples/validation_demo/problem_model_contract.json
@@ -0,0 +1,24 @@
+{
+  "contract_version": "1",
+  "task": "object_detection",
+  "preset": "yolov8_coco",
+  "input": {
+    "name": "images",
+    "shape": [1, 3, 320, 320],
+    "format": "NCHW_RGB_FLOAT32_0_1"
+  },
+  "output": {
+    "name": "output0",
+    "type": "yolov8_detection",
+    "shape": [1, 84, 8400],
+    "format": "tensor"
+  },
+  "thresholds": {
+    "score": 0.25,
+    "iou": 0.5
+  },
+  "metadata": {
+    "demo_case": "problem",
+    "expected_issue": "Input shape intentionally differs from the YOLOv8 COCO preset input size."
+  }
+}
diff --git a/examples/validation_demo/yolov8_coco_model_contract.json b/examples/validation_demo/yolov8_coco_model_contract.json
new file mode 100644
index 0000000..5162ba7
--- /dev/null
+++ b/examples/validation_demo/yolov8_coco_model_contract.json
@@ -0,0 +1,26 @@
+{
+  "contract_version": "1",
+  "task": "object_detection",
+  "preset": "yolov8_coco",
+  "input": {
+    "name": "images",
+    "shape": [1, 3, 640, 640],
+    "format": "NCHW_RGB_FLOAT32_0_1",
+    "dtype": "float32"
+  },
+  "output": {
+    "name": "output0",
+    "type": "yolov8_detection",
+    "shape": [1, 84, 8400],
+    "format": "tensor",
+    "dtype": "float32"
+  },
+  "thresholds": {
+    "score": 0.25,
+    "iou": 0.5
+  },
+  "metadata": {
+    "demo_case": "normal",
+    "note": "Small contract fixture for contract/preset validation demos."
+  }
+}
diff --git a/inferedgelab/commands/evaluate_detection.py b/inferedgelab/commands/evaluate_detection.py
index dc4eca0..73b0379 100644
--- a/inferedgelab/commands/evaluate_detection.py
+++ b/inferedgelab/commands/evaluate_detection.py
@@ -19,6 +19,13 @@
 from inferedgelab.result.saver import save_result
 from inferedgelab.result.schema import BenchmarkResult
 from inferedgelab.utils.system_info import collect_system_snapshot
+from inferedgelab.validation.model_contract import (
+    ModelContractError,
+    build_default_contract,
+    load_model_contract,
+)
+from inferedgelab.validation.presets import get_preset, supported_presets
+from inferedgelab.validation.report import build_evaluation_report, save_evaluation_report
 
 
 def _exit_with_runtime_error(message: str) -> None:
@@ -26,12 +33,22 @@ def _exit_with_runtime_error(message: str) -> None:
     raise typer.Exit(code=1)
 
 
+def _option_string(value: object, default: str = "") -> str:
+    if isinstance(value, str):
+        return value
+    option_default = getattr(value, "default", default)
+    return option_default if isinstance(option_default, str) else default
+
+
 def evaluate_detection_cmd(
     model_path: str = typer.Argument(..., help="평가할 ONNX 모델 경로"),
     engine: str = typer.Option("tensorrt", "--engine", help="추론 엔진 선택"),
     engine_path: str = typer.Option("", "--engine-path", help="Runtime artifact 경로"),
     image_dir: str = typer.Option(..., "--image-dir", help="평가 이미지 디렉토리"),
-    label_dir: str = typer.Option(..., "--label-dir", help="YOLO txt 라벨 디렉토리"),
+    label_dir: str = typer.Option("", "--label-dir", help="YOLO txt 라벨 디렉토리"),
+    coco_annotations: str = typer.Option("", "--coco-annotations", help="COCO annotation JSON 경로"),
+    preset: str = typer.Option("yolov8_coco", "--preset", help="Validation preset 이름"),
+    model_contract: str = typer.Option("", "--model-contract", help="model_contract.json 경로"),
     num_classes: int = typer.Option(1, "--num-classes", help="클래스 수"),
     precision: str = typer.Option("fp16", "--precision", help="precision 메타데이터 (fp32, fp16, int8)"),
     conf_threshold: float = typer.Option(0.2, "--conf-threshold", help="confidence threshold"),
@@ -40,6 +57,9 @@ def evaluate_detection_cmd(
     rgb: bool = typer.Option(True, "--rgb/--bgr", help="Use RGB input conversion after OpenCV read"),
     debug_samples: int = typer.Option(0, "--debug-samples", help="Print internal debug output for the first N images"),
     out_json: str = typer.Option("", "--out-json", help="Accuracy payload 저장 경로"),
+    report_json: str = typer.Option("", "--report-json", help="Evaluation report JSON 저장 경로"),
+    report_md: str = typer.Option("", "--report-md", help="Evaluation report Markdown 저장 경로"),
+    report_html: str = typer.Option("", "--report-html", help="Evaluation report HTML 저장 경로"),
     out_dir: str = typer.Option("results", "--out-dir", help="structured result 저장 디렉토리"),
     save_structured_result: bool = typer.Option(
         True,
@@ -64,6 +84,23 @@ def evaluate_detection_cmd(
 
     if num_classes <= 0:
         raise typer.BadParameter("--num-classes must be >= 1")
+    coco_annotations = _option_string(coco_annotations)
+    preset = _option_string(preset, "yolov8_coco")
+    model_contract = _option_string(model_contract)
+    report_json = _option_string(report_json)
+    report_md = _option_string(report_md)
+    report_html = _option_string(report_html)
+    preset = preset.strip().lower()
+    try:
+        preset_def = get_preset(preset)
+        contract = (
+            load_model_contract(model_contract.strip(), default_preset=preset)
+            if model_contract.strip()
+            else build_default_contract(preset)
+        )
+    except (ValueError, ModelContractError) as exc:
+        supported = ", ".join(supported_presets())
+        raise typer.BadParameter(f"{exc} Supported presets: {supported}") from exc
     if not isinstance(debug_samples, int):
         debug_samples = int(getattr(debug_samples, "default", 0))
     if debug_samples < 0:
@@ -75,7 +112,8 @@ def evaluate_detection_cmd(
             engine_name=engine,
             engine_path=engine_path.strip() or None,
             image_dir=image_dir,
-            label_dir=label_dir,
+            label_dir=label_dir.strip() or None,
+            coco_annotations=coco_annotations.strip() or None,
             num_classes=num_classes,
             conf_threshold=conf_threshold,
             nms_threshold=nms_threshold,
@@ -114,6 +152,9 @@ def evaluate_detection_cmd(
                 "task": "detection",
                 "engine": engine,
                 "engine_path": engine_path.strip() or None,
+                "preset": preset,
+                "model_contract_path": model_contract.strip() or None,
+                "coco_annotations": coco_annotations.strip() or None,
                 "num_classes": num_classes,
             },
             accuracy=accuracy_payload,
@@ -126,22 +167,47 @@ def evaluate_detection_cmd(
                     "evaluation_config": eval_result.evaluation_config,
                     "engine_path": engine_path.strip() or None,
                     "runtime_artifact_path": eval_result.extra.get("runtime_artifact_path"),
+                    "structural_validation": eval_result.extra.get("structural_validation"),
+                    "accuracy_status": eval_result.extra.get("accuracy_status", "evaluated"),
                 }
             },
         )
         result_path = save_result(structured, out_dir=out_dir)
 
+    evaluation_report = build_evaluation_report(
+        eval_result=eval_result,
+        model_contract=contract,
+        preset=preset_def.to_dict(),
+    )
+    save_evaluation_report(
+        evaluation_report,
+        json_path=report_json,
+        markdown_path=report_md,
+        html_path=report_html,
+    )
+
     rprint(f"Engine          : {eval_result.engine}")
     rprint(f"Images          : {image_dir}")
-    rprint(f"Labels          : {label_dir}")
+    rprint(f"Labels          : {label_dir or '(not provided)'}")
+    rprint(f"COCO annotations: {coco_annotations or '(not provided)'}")
     rprint(f"Samples         : {eval_result.sample_count}")
-    rprint(f"Precision       : {eval_result.metrics['precision']:.4f}")
-    rprint(f"Recall          : {eval_result.metrics['recall']:.4f}")
-    rprint(f"F1 Score        : {eval_result.metrics['f1_score']:.4f}")
-    rprint(f"mAP@50          : {eval_result.metrics['map50']:.4f}")
-    rprint(f"mAP@50-95       : {eval_result.metrics['map50_95']:.4f}")
+    rprint(f"Accuracy status : {eval_result.extra.get('accuracy_status', 'evaluated')}")
+    if eval_result.extra.get("accuracy_status") == "skipped":
+        rprint(f"Accuracy skipped: {eval_result.extra.get('accuracy_skip_reason')}")
+    else:
+        rprint(f"Precision       : {eval_result.metrics['precision']:.4f}")
+        rprint(f"Recall          : {eval_result.metrics['recall']:.4f}")
+        rprint(f"F1 Score        : {eval_result.metrics['f1_score']:.4f}")
+        rprint(f"mAP@50          : {eval_result.metrics['map50']:.4f}")
+        rprint(f"mAP@50-95       : {eval_result.metrics['map50_95']:.4f}")
 
     if saved_json_path:
         rprint(f"[cyan]Saved accuracy[/cyan]  : {saved_json_path}")
+    if report_json.strip():
+        rprint(f"[cyan]Saved evaluation JSON[/cyan]: {report_json}")
+    if report_md.strip():
+        rprint(f"[cyan]Saved evaluation Markdown[/cyan]: {report_md}")
+    if report_html.strip():
+        rprint(f"[cyan]Saved evaluation HTML[/cyan]: {report_html}")
     if result_path:
         rprint(f"[cyan]Saved structured result[/cyan]: {result_path}")
diff --git a/inferedgelab/core/detection_evaluator.py b/inferedgelab/core/detection_evaluator.py
index 834f048..9ade7b5 100644
--- a/inferedgelab/core/detection_evaluator.py
+++ b/inferedgelab/core/detection_evaluator.py
@@ -10,6 +10,8 @@
 
 from inferedgelab.engines.base import EngineModelIO
 from inferedgelab.engines.registry import create_engine, normalize_engine_name
+from inferedgelab.validation.coco import load_coco_ground_truths
+from inferedgelab.validation.structural import validate_detection_structure
 
 
 @dataclass
@@ -750,9 +752,11 @@ def compute_precision_recall_f1(
 def build_accuracy_payload(eval_result: DetectionEvalResult) -> dict[str, Any]:
     return {
         "task": "detection",
+        "status": eval_result.extra.get("accuracy_status", "evaluated"),
         "metrics": dict(eval_result.metrics),
         "dataset": dict(eval_result.dataset),
         "evaluation_config": dict(eval_result.evaluation_config),
+        "notes": list(eval_result.notes),
     }
 
 
@@ -797,7 +801,8 @@ def evaluate_detection_engine(
     engine_name: str,
     engine_path: str | None,
     image_dir: str,
-    label_dir: str,
+    label_dir: str | None = None,
+    coco_annotations: str | None = None,
     num_classes: int = 1,
     conf_threshold: float = 0.2,
     nms_threshold: float = 0.45,
@@ -821,6 +826,14 @@ def evaluate_detection_engine(
 
         model_input = engine.inputs[0]
         image_files = get_image_files(image_dir)
+        coco_ground_truths = load_coco_ground_truths(coco_annotations) if coco_annotations else {}
+        accuracy_status = "evaluated" if label_dir or coco_annotations else "skipped"
+        accuracy_skip_reason = ""
+        if accuracy_status == "skipped":
+            accuracy_skip_reason = (
+                "No YOLO label directory or COCO annotation file was provided; "
+                "only output structure was validated."
+            )
 
         predictions_by_image: list[list[Detection]] = []
         ground_truths_by_image: list[list[GroundTruth]] = []
@@ -858,12 +871,19 @@ def evaluate_detection_engine(
                 debug=postprocess_debug,
             )
 
-            label_path = os.path.join(label_dir, f"{Path(image_path).stem}.txt")
-            ground_truths = load_ground_truth(
-                label_path,
-                image_width=original_width,
-                image_height=original_height,
-            )
+            ground_truths: list[GroundTruth] = []
+            if label_dir:
+                label_path = os.path.join(label_dir, f"{Path(image_path).stem}.txt")
+                ground_truths = load_ground_truth(
+                    label_path,
+                    image_width=original_width,
+                    image_height=original_height,
+                )
+            elif coco_annotations:
+                ground_truths = [
+                    GroundTruth(class_id=item.class_id, box=item.box)
+                    for item in coco_ground_truths.get(Path(image_path).name, [])
+                ]
 
             predictions_by_image.append(detections)
             ground_truths_by_image.append(ground_truths)
@@ -886,32 +906,40 @@ def evaluate_detection_engine(
                     )
                 )
 
-        precision, recall, f1_score = compute_precision_recall_f1(
+        structural_validation = validate_detection_structure(
             predictions_by_image,
-            ground_truths_by_image,
             num_classes=num_classes,
-            iou_threshold=iou_threshold,
         )
-        map50 = compute_average_precision(
-            predictions_by_image,
-            ground_truths_by_image,
-            num_classes=num_classes,
-            iou_threshold=0.5,
-        )
-        map_thresholds = np.arange(0.5, 1.0, 0.05)
-        map50_95 = float(
-            np.mean(
-                [
-                    compute_average_precision(
-                        predictions_by_image,
-                        ground_truths_by_image,
-                        num_classes=num_classes,
-                        iou_threshold=float(threshold),
-                    )
-                    for threshold in map_thresholds
-                ]
+
+        if accuracy_status == "evaluated":
+            precision, recall, f1_score = compute_precision_recall_f1(
+                predictions_by_image,
+                ground_truths_by_image,
+                num_classes=num_classes,
+                iou_threshold=iou_threshold,
             )
-        )
+            map50 = compute_average_precision(
+                predictions_by_image,
+                ground_truths_by_image,
+                num_classes=num_classes,
+                iou_threshold=0.5,
+            )
+            map_thresholds = np.arange(0.5, 1.0, 0.05)
+            map50_95 = float(
+                np.mean(
+                    [
+                        compute_average_precision(
+                            predictions_by_image,
+                            ground_truths_by_image,
+                            num_classes=num_classes,
+                            iou_threshold=float(threshold),
+                        )
+                        for threshold in map_thresholds
+                    ]
+                )
+            )
+        else:
+            precision = recall = f1_score = map50 = map50_95 = 0.0
 
         return DetectionEvalResult(
             task="detection",
@@ -926,9 +954,10 @@ def evaluate_detection_engine(
                 "recall": recall,
             },
             notes=[
-                "Detection evaluation uses YOLO txt labels and image directory traversal.",
+                "Detection evaluation uses image directory traversal.",
                 "YOLOv8 postprocessing supports single-output and split boxes/scores output layouts.",
-                "Primary detection accuracy metric for compare/enrich reuse is map50.",
+                "Accuracy uses YOLO txt labels or COCO annotations when provided.",
+                "When annotations are missing, InferEdge records accuracy_skipped and structural validation only.",
             ],
             model_input={
                 "name": model_input.name,
@@ -939,7 +968,9 @@ def evaluate_detection_engine(
             dataset={
                 "image_dir": image_dir,
                 "label_dir": label_dir,
+                "coco_annotations": coco_annotations,
                 "sample_count": len(image_files),
+                "accuracy_status": accuracy_status,
             },
             evaluation_config={
                 "conf_threshold": conf_threshold,
@@ -953,6 +984,9 @@ def evaluate_detection_engine(
                 "engine_path": engine_path,
                 "runtime_artifact_path": getattr(engine.runtime_paths, "runtime_artifact_path", None),
                 "image_files": image_files,
+                "accuracy_status": accuracy_status,
+                "accuracy_skip_reason": accuracy_skip_reason,
+                "structural_validation": structural_validation,
             },
         )
     finally:
diff --git a/inferedgelab/validation/__init__.py b/inferedgelab/validation/__init__.py
new file mode 100644
index 0000000..08ce242
--- /dev/null
+++ b/inferedgelab/validation/__init__.py
@@ -0,0 +1,14 @@
+"""Validation contract, preset, and report helpers for InferEdgeLab."""
+
+from inferedgelab.validation.model_contract import ModelContract, load_model_contract, parse_model_contract
+from inferedgelab.validation.presets import get_preset, supported_presets
+from inferedgelab.validation.report import build_evaluation_report
+
+__all__ = [
+    "ModelContract",
+    "build_evaluation_report",
+    "get_preset",
+    "load_model_contract",
+    "parse_model_contract",
+    "supported_presets",
+]
diff --git a/inferedgelab/validation/coco.py b/inferedgelab/validation/coco.py
new file mode 100644
index 0000000..a17f9ec
--- /dev/null
+++ b/inferedgelab/validation/coco.py
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+import json
+from pathlib import Path
+from typing import Any
+
+
+@dataclass(frozen=True)
+class CocoGroundTruth:
+    image_id: int
+    file_name: str
+    class_id: int
+    box: tuple[float, float, float, float]
+
+
+def load_coco_ground_truths(path: str) -> dict[str, list[CocoGroundTruth]]:
+    payload = json.loads(Path(path).read_text(encoding="utf-8"))
+    if not isinstance(payload, dict):
+        raise ValueError("COCO annotations must be a JSON object.")
+
+    images = payload.get("images")
+    annotations = payload.get("annotations")
+    categories = payload.get("categories", [])
+    if not isinstance(images, list) or not isinstance(annotations, list):
+        raise ValueError("COCO annotations require images and annotations arrays.")
+
+    image_by_id: dict[int, str] = {}
+    for image in images:
+        if not isinstance(image, dict):
+            continue
+        image_id = int(image["id"])
+        image_by_id[image_id] = str(image["file_name"])
+
+    category_to_class = _category_to_zero_based_class(categories)
+    result: dict[str, list[CocoGroundTruth]] = {}
+    for annotation in annotations:
+        item = _parse_annotation(annotation, image_by_id=image_by_id, category_to_class=category_to_class)
+        if item is None:
+            continue
+        result.setdefault(Path(item.file_name).name, []).append(item)
+    return result
+
+
+def _category_to_zero_based_class(categories: Any) -> dict[int, int]:
+    if not isinstance(categories, list) or not categories:
+        return {}
+    ids = sorted(int(category["id"]) for category in categories if isinstance(category, dict) and "id" in category)
+    return {category_id: index for index, category_id in enumerate(ids)}
+
+
+def _parse_annotation(
+    annotation: Any,
+    *,
+    image_by_id: dict[int, str],
+    category_to_class: dict[int, int],
+) -> CocoGroundTruth | None:
+    if not isinstance(annotation, dict):
+        return None
+    if annotation.get("iscrowd", 0):
+        return None
+
+    image_id = int(annotation["image_id"])
+    file_name = image_by_id.get(image_id)
+    if not file_name:
+        return None
+
+    bbox = annotation.get("bbox")
+    if not isinstance(bbox, list) or len(bbox) != 4:
+        return None
+    x, y, width, height = (float(value) for value in bbox)
+    category_id = int(annotation["category_id"])
+    class_id = category_to_class.get(category_id, category_id - 1)
+    return CocoGroundTruth(
+        image_id=image_id,
+        file_name=file_name,
+        class_id=class_id,
+        box=(x + width / 2.0, y + height / 2.0, width, height),
+    )
diff --git a/inferedgelab/validation/model_contract.py b/inferedgelab/validation/model_contract.py
new file mode 100644
index 0000000..d37ff3d
--- /dev/null
+++ b/inferedgelab/validation/model_contract.py
@@ -0,0 +1,146 @@
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+import json
+from pathlib import Path
+from typing import Any
+
+from inferedgelab.validation.presets import get_preset
+
+
+class ModelContractError(ValueError):
+    pass
+
+
+@dataclass(frozen=True)
+class ModelContractIO:
+    shape: list[int]
+    format: str
+    name: str | None = None
+    dtype: str | None = None
+    type: str | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+
+
+@dataclass(frozen=True)
+class ModelContract:
+    contract_version: str
+    task: str
+    preset: str
+    labels: list[str]
+    input: ModelContractIO
+    output: ModelContractIO
+    thresholds: dict[str, float]
+    metadata: dict[str, Any]
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "contract_version": self.contract_version,
+            "task": self.task,
+            "preset": self.preset,
+            "labels": list(self.labels),
+            "input": self.input.to_dict(),
+            "output": self.output.to_dict(),
+            "thresholds": dict(self.thresholds),
+            "metadata": dict(self.metadata),
+        }
+
+
+def build_default_contract(preset_name: str = "yolov8_coco") -> ModelContract:
+    preset = get_preset(preset_name)
+    return ModelContract(
+        contract_version="1",
+        task=preset.task,
+        preset=preset.name,
+        labels=list(preset.labels),
+        input=ModelContractIO(shape=list(preset.input_shape), format=preset.input_format),
+        output=ModelContractIO(
+            shape=list(preset.output_shape),
+            format="tensor",
+            type=preset.output_type,
+        ),
+        thresholds=dict(preset.thresholds),
+        metadata={"source": "preset", "description": preset.description},
+    )
+
+
+def parse_model_contract(payload: dict[str, Any], *, default_preset: str = "yolov8_coco") -> ModelContract:
+    if not isinstance(payload, dict):
+        raise ModelContractError("model_contract payload must be a JSON object.")
+
+    preset_name = str(payload.get("preset") or default_preset).strip().lower()
+    preset = get_preset(preset_name)
+    default_contract = build_default_contract(preset.name)
+
+    input_payload = payload.get("input") or {}
+    output_payload = payload.get("output") or {}
+    if not isinstance(input_payload, dict):
+        raise ModelContractError("model_contract.input must be an object.")
+    if not isinstance(output_payload, dict):
+        raise ModelContractError("model_contract.output must be an object.")
+
+    labels = payload.get("labels", default_contract.labels)
+    if labels is None:
+        labels = []
+    if not isinstance(labels, list) or not all(isinstance(label, str) for label in labels):
+        raise ModelContractError("model_contract.labels must be a list of strings.")
+
+    thresholds = payload.get("thresholds", default_contract.thresholds)
+    if not isinstance(thresholds, dict):
+        raise ModelContractError("model_contract.thresholds must be an object.")
+
+    input_shape = input_payload.get("shape", default_contract.input.shape)
+    output_shape = output_payload.get("shape", default_contract.output.shape)
+    _validate_shape(input_shape, "input.shape")
+    _validate_shape(output_shape, "output.shape")
+
+    task = str(payload.get("task") or preset.task)
+    if preset.name != "custom_contract" and task != preset.task:
+        raise ModelContractError(f"model_contract.task '{task}' does not match preset '{preset.name}'.")
+
+    return ModelContract(
+        contract_version=str(payload.get("contract_version") or default_contract.contract_version),
+        task=task,
+        preset=preset.name,
+        labels=list(labels),
+        input=ModelContractIO(
+            shape=[int(value) for value in input_shape],
+            format=str(input_payload.get("format") or default_contract.input.format),
+            name=_optional_string(input_payload.get("name")),
+            dtype=_optional_string(input_payload.get("dtype")),
+        ),
+        output=ModelContractIO(
+            shape=[int(value) for value in output_shape],
+            format=str(output_payload.get("format") or default_contract.output.format),
+            name=_optional_string(output_payload.get("name")),
+            dtype=_optional_string(output_payload.get("dtype")),
+            type=str(output_payload.get("type") or default_contract.output.type),
+        ),
+        thresholds={str(key): float(value) for key, value in thresholds.items()},
+        metadata=dict(payload.get("metadata") or {}),
+    )
+
+
+def load_model_contract(path: str, *, default_preset: str = "yolov8_coco") -> ModelContract:
+    contract_path = Path(path)
+    try:
+        payload = json.loads(contract_path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as exc:
+        raise ModelContractError(f"model_contract is not valid JSON: {path}") from exc
+    return parse_model_contract(payload, default_preset=default_preset)
+
+
+def _validate_shape(shape: Any, field_name: str) -> None:
+    if not isinstance(shape, list) or not shape:
+        raise ModelContractError(f"model_contract.{field_name} must be a non-empty list.")
+    if not all(isinstance(value, int) and value > 0 for value in shape):
+        raise ModelContractError(f"model_contract.{field_name} must contain positive integers.")
+
+
+def _optional_string(value: Any) -> str | None:
+    if value is None:
+        return None
+    text = str(value).strip()
+    return text or None
diff --git a/inferedgelab/validation/presets.py b/inferedgelab/validation/presets.py
new file mode 100644
index 0000000..e9e5cf4
--- /dev/null
+++ b/inferedgelab/validation/presets.py
@@ -0,0 +1,169 @@
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from typing import Any
+
+
+@dataclass(frozen=True)
+class ValidationPreset:
+    name: str
+    task: str
+    description: str
+    input_shape: list[int]
+    input_format: str
+    output_type: str
+    output_shape: list[int]
+    labels: list[str]
+    thresholds: dict[str, float]
+    accuracy: dict[str, Any]
+
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+
+
+COCO80_LABELS = [
+    "person",
+    "bicycle",
+    "car",
+    "motorcycle",
+    "airplane",
+    "bus",
+    "train",
+    "truck",
+    "boat",
+    "traffic light",
+    "fire hydrant",
+    "stop sign",
+    "parking meter",
+    "bench",
+    "bird",
+    "cat",
+    "dog",
+    "horse",
+    "sheep",
+    "cow",
+    "elephant",
+    "bear",
+    "zebra",
+    "giraffe",
+    "backpack",
+    "umbrella",
+    "handbag",
+    "tie",
+    "suitcase",
+    "frisbee",
+    "skis",
+    "snowboard",
+    "sports ball",
+    "kite",
+    "baseball bat",
+    "baseball glove",
+    "skateboard",
+    "surfboard",
+    "tennis racket",
+    "bottle",
+    "wine glass",
+    "cup",
+    "fork",
+    "knife",
+    "spoon",
+    "bowl",
+    "banana",
+    "apple",
+    "sandwich",
+    "orange",
+    "broccoli",
+    "carrot",
+    "hot dog",
+    "pizza",
+    "donut",
+    "cake",
+    "chair",
+    "couch",
+    "potted plant",
+    "bed",
+    "dining table",
+    "toilet",
+    "tv",
+    "laptop",
+    "mouse",
+    "remote",
+    "keyboard",
+    "cell phone",
+    "microwave",
+    "oven",
+    "toaster",
+    "sink",
+    "refrigerator",
+    "book",
+    "clock",
+    "vase",
+    "scissors",
+    "teddy bear",
+    "hair drier",
+    "toothbrush",
+]
+
+
+_PRESETS: dict[str, ValidationPreset] = {
+    "yolov8_coco": ValidationPreset(
+        name="yolov8_coco",
+        task="object_detection",
+        description="YOLOv8 object detection on COCO-style labels.",
+        input_shape=[1, 3, 640, 640],
+        input_format="NCHW_RGB_FLOAT32_0_1",
+        output_type="yolov8_detection",
+        output_shape=[1, 84, 8400],
+        labels=COCO80_LABELS,
+        thresholds={"score": 0.25, "iou": 0.5},
+        accuracy={
+            "primary_metric": "map50",
+            "secondary_metrics": ["precision", "recall", "f1_score", "map50_95"],
+            "annotation_formats": ["coco", "yolo_txt"],
+        },
+    ),
+    "resnet_imagenet": ValidationPreset(
+        name="resnet_imagenet",
+        task="classification",
+        description="ImageNet classification contract placeholder.",
+        input_shape=[1, 3, 224, 224],
+        input_format="NCHW_RGB_FLOAT32_0_1",
+        output_type="classification_logits",
+        output_shape=[1, 1000],
+        labels=[],
+        thresholds={"top1_min": 0.0, "top5_min": 0.0},
+        accuracy={
+            "primary_metric": "top1",
+            "secondary_metrics": ["top5"],
+            "annotation_formats": ["imagenet_folder", "custom_contract"],
+        },
+    ),
+    "custom_contract": ValidationPreset(
+        name="custom_contract",
+        task="custom",
+        description="Custom validation requires an explicit model_contract.json.",
+        input_shape=[],
+        input_format="custom",
+        output_type="custom",
+        output_shape=[],
+        labels=[],
+        thresholds={},
+        accuracy={
+            "primary_metric": "contract_defined",
+            "secondary_metrics": [],
+            "annotation_formats": ["custom_contract"],
+        },
+    ),
+}
+
+
+def supported_presets() -> list[str]:
+    return sorted(_PRESETS)
+
+
+def get_preset(name: str) -> ValidationPreset:
+    key = name.strip().lower()
+    if key not in _PRESETS:
+        supported = ", ".join(supported_presets())
+        raise ValueError(f"Unsupported validation preset: {name}. Supported presets: {supported}")
+    return _PRESETS[key]
diff --git a/inferedgelab/validation/report.py b/inferedgelab/validation/report.py
new file mode 100644
index 0000000..693f672
--- /dev/null
+++ b/inferedgelab/validation/report.py
@@ -0,0 +1,135 @@
+from __future__ import annotations
+
+from datetime import datetime
+import html
+import json
+from pathlib import Path
+from typing import Any
+
+from inferedgelab.validation.model_contract import ModelContract
+from inferedgelab.validation.structural import validate_shape
+
+
+def build_evaluation_report(
+    *,
+    eval_result: Any,
+    model_contract: ModelContract,
+    preset: dict[str, Any],
+    latency_summary: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    accuracy_status = str(eval_result.extra.get("accuracy_status") or "evaluated")
+    structural_validation = dict(eval_result.extra.get("structural_validation") or {})
+    contract_validation = {
+        "input_shape": validate_shape(eval_result.actual_input_shape, model_contract.input.shape)
+        if eval_result.actual_input_shape and model_contract.input.shape
+        else {"status": "not_checked"},
+        "preset": model_contract.preset,
+        "task": model_contract.task,
+    }
+    return {
+        "report_role": "inferedge-evaluation-report",
+        "generated_at": datetime.utcnow().isoformat(timespec="seconds") + "Z",
+        "preset": preset,
+        "model_contract": model_contract.to_dict(),
+        "runtime_result": {
+            "engine": eval_result.engine,
+            "device": eval_result.device,
+            "sample_count": eval_result.sample_count,
+            "model_input": eval_result.model_input,
+            "actual_input_shape": eval_result.actual_input_shape,
+        },
+        "accuracy": {
+            "status": accuracy_status,
+            "metrics": dict(eval_result.metrics),
+            "reason": eval_result.extra.get("accuracy_skip_reason") if accuracy_status == "skipped" else None,
+        },
+        "contract_validation": contract_validation,
+        "structural_validation": structural_validation,
+        "latency_summary": latency_summary or {"status": "not_provided"},
+        "deployment_signal": _deployment_signal(accuracy_status, structural_validation, contract_validation),
+        "notes": list(eval_result.notes),
+    }
+
+
+def save_evaluation_report(report: dict[str, Any], *, json_path: str = "", markdown_path: str = "", html_path: str = "") -> None:
+    if json_path.strip():
+        _write_text(json_path, json.dumps(report, ensure_ascii=False, indent=2) + "\n")
+    if markdown_path.strip():
+        _write_text(markdown_path, render_evaluation_markdown(report))
+    if html_path.strip():
+        _write_text(html_path, render_evaluation_html(report))
+
+
+def render_evaluation_markdown(report: dict[str, Any]) -> str:
+    accuracy = report["accuracy"]
+    structural = report.get("structural_validation") or {}
+    contract_validation = report.get("contract_validation") or {}
+    input_shape = contract_validation.get("input_shape") or {}
+    signal = report["deployment_signal"]
+    lines = [
+        "# InferEdge Evaluation Report",
+        "",
+        f"- preset: `{report['preset']['name']}`",
+        f"- engine: `{report['runtime_result']['engine']}`",
+        f"- device: `{report['runtime_result']['device']}`",
+        f"- samples: `{report['runtime_result']['sample_count']}`",
+        f"- accuracy status: `{accuracy['status']}`",
+        f"- contract input shape: `{input_shape.get('status', 'unknown')}`",
+        f"- structural validation: `{structural.get('status', 'unknown')}`",
+        f"- deployment signal: `{signal['decision']}`",
+        "",
+        "## Metrics",
+    ]
+    if accuracy["status"] == "skipped":
+        lines.append(f"- accuracy skipped reason: {accuracy.get('reason') or 'not provided'}")
+    for key, value in accuracy.get("metrics", {}).items():
+        lines.append(f"- {key}: `{value}`")
+    lines.extend(["", "## Notes"])
+    for note in report.get("notes", []):
+        lines.append(f"- {note}")
+    return "\n".join(lines) + "\n"
+
+
+def render_evaluation_html(report: dict[str, Any]) -> str:
+    markdown = render_evaluation_markdown(report)
+    escaped = html.escape(markdown)
+    return (
+        "<!doctype html>\n"
+        "<html><head><meta charset=\"utf-8\"><title>InferEdge Evaluation Report</title></head>"
+        "<body><pre>"
+        f"{escaped}"
+        "</pre></body></html>\n"
+    )
+
+
+def _deployment_signal(
+    accuracy_status: str,
+    structural_validation: dict[str, Any],
+    contract_validation: dict[str, Any],
+) -> dict[str, str]:
+    if (contract_validation.get("input_shape") or {}).get("status") == "mismatch":
+        return {
+            "decision": "blocked",
+            "reason": "Actual runtime input shape does not match the model contract.",
+        }
+    if structural_validation.get("status") == "failed":
+        return {
+            "decision": "blocked",
+            "reason": "Structural validation found invalid detection output.",
+        }
+    if accuracy_status == "skipped":
+        return {
+            "decision": "review",
+            "reason": "Accuracy evaluation was skipped because annotations were not provided.",
+        }
+    return {
+        "decision": "review",
+        "reason": "Accuracy evidence is available; compare and deployment policy still decide release.",
+    }
+
+
+def _write_text(path: str, text: str) -> None:
+    out_path = Path(path)
+    if out_path.parent != Path("."):
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_path.write_text(text, encoding="utf-8")
diff --git a/inferedgelab/validation/structural.py b/inferedgelab/validation/structural.py
new file mode 100644
index 0000000..3110f17
--- /dev/null
+++ b/inferedgelab/validation/structural.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+import math
+from typing import Any, Sequence
+
+
+def validate_detection_structure(
+    detections_by_image: Sequence[Sequence[Any]],
+    *,
+    num_classes: int | None = None,
+) -> dict[str, Any]:
+    issues: list[dict[str, Any]] = []
+    detection_count = 0
+
+    for image_index, detections in enumerate(detections_by_image):
+        for detection_index, detection in enumerate(detections):
+            detection_count += 1
+            class_id = int(getattr(detection, "class_id", -1))
+            confidence = float(getattr(detection, "confidence", float("nan")))
+            box = tuple(float(value) for value in getattr(detection, "box", ()))
+
+            if num_classes is not None and not 0 <= class_id < num_classes:
+                issues.append(
+                    _issue(image_index, detection_index, "class_id_out_of_range", class_id)
+                )
+            if not math.isfinite(confidence) or not 0.0 <= confidence <= 1.0:
+                issues.append(_issue(image_index, detection_index, "score_out_of_range", confidence))
+            if len(box) != 4:
+                issues.append(_issue(image_index, detection_index, "bbox_not_xywh", list(box)))
+                continue
+            if not all(math.isfinite(value) for value in box):
+                issues.append(_issue(image_index, detection_index, "bbox_non_finite", list(box)))
+            if box[2] <= 0.0 or box[3] <= 0.0:
+                issues.append(_issue(image_index, detection_index, "bbox_non_positive_size", list(box)))
+
+    return {
+        "status": "passed" if not issues else "failed",
+        "checked": {
+            "image_count": len(detections_by_image),
+            "detection_count": detection_count,
+            "num_classes": num_classes,
+        },
+        "issues": issues,
+    }
+
+
+def validate_shape(actual_shape: Sequence[int], expected_shape: Sequence[int]) -> dict[str, Any]:
+    actual = [int(value) for value in actual_shape]
+    expected = [int(value) for value in expected_shape]
+    return {
+        "status": "passed" if actual == expected else "mismatch",
+        "actual_shape": actual,
+        "expected_shape": expected,
+    }
+
+
+def _issue(image_index: int, detection_index: int, code: str, value: Any) -> dict[str, Any]:
+    return {
+        "image_index": image_index,
+        "detection_index": detection_index,
+        "code": code,
+        "value": value,
+    }
diff --git a/tests/fixtures/validation/coco_minimal.json b/tests/fixtures/validation/coco_minimal.json
new file mode 100644
index 0000000..7fc0b4a
--- /dev/null
+++ b/tests/fixtures/validation/coco_minimal.json
@@ -0,0 +1,26 @@
+{
+  "images": [
+    {
+      "id": 1,
+      "file_name": "sample.jpg",
+      "width": 640,
+      "height": 480
+    }
+  ],
+  "categories": [
+    {
+      "id": 1,
+      "name": "person"
+    }
+  ],
+  "annotations": [
+    {
+      "id": 10,
+      "image_id": 1,
+      "category_id": 1,
+      "bbox": [100.0, 120.0, 80.0, 60.0],
+      "area": 4800.0,
+      "iscrowd": 0
+    }
+  ]
+}
diff --git a/tests/test_evaluate_detection.py b/tests/test_evaluate_detection.py
index 750a72a..9adc24e 100644
--- a/tests/test_evaluate_detection.py
+++ b/tests/test_evaluate_detection.py
@@ -273,6 +273,79 @@ def fake_save_result(result, out_dir="results"):
     assert captured["engine_kwargs"]["debug_samples"] == 0
 
 
+def test_evaluate_detection_command_writes_contract_evaluation_report(tmp_path, monkeypatch):
+    from inferedgelab.commands import evaluate_detection
+
+    captured = {}
+
+    def fake_evaluate_detection_engine(**kwargs):
+        captured["engine_kwargs"] = kwargs
+        return DetectionEvalResult(
+            task="detection",
+            engine="onnxruntime",
+            device="cpu",
+            sample_count=1,
+            metrics={
+                "map50": 0.0,
+                "map50_95": 0.0,
+                "f1_score": 0.0,
+                "precision": 0.0,
+                "recall": 0.0,
+            },
+            notes=["structural validation only"],
+            model_input={"name": "images", "dtype": "float32", "shape": [1, 3, 640, 640]},
+            actual_input_shape=[1, 3, 640, 640],
+            dataset={"image_dir": "images", "label_dir": None, "sample_count": 1},
+            evaluation_config={
+                "conf_threshold": 0.2,
+                "nms_threshold": 0.45,
+                "iou_threshold": 0.5,
+                "input_size": 640,
+                "rgb": True,
+            },
+            extra={
+                "accuracy_status": "skipped",
+                "accuracy_skip_reason": "No annotations were provided.",
+                "structural_validation": {"status": "passed", "issues": []},
+            },
+        )
+
+    monkeypatch.setattr(evaluate_detection, "evaluate_detection_engine", fake_evaluate_detection_engine)
+
+    report_json = tmp_path / "evaluation.json"
+    report_md = tmp_path / "evaluation.md"
+    evaluate_detection.evaluate_detection_cmd(
+        model_path="models/onnx/yolov8n.onnx",
+        engine="onnxruntime",
+        engine_path="",
+        image_dir="images",
+        label_dir="",
+        coco_annotations="",
+        preset="yolov8_coco",
+        model_contract="",
+        num_classes=80,
+        precision="fp32",
+        conf_threshold=0.2,
+        nms_threshold=0.45,
+        iou_threshold=0.5,
+        rgb=True,
+        debug_samples=0,
+        out_json="",
+        report_json=str(report_json),
+        report_md=str(report_md),
+        report_html="",
+        out_dir=str(tmp_path / "results"),
+        save_structured_result=False,
+    )
+
+    report = json.loads(report_json.read_text(encoding="utf-8"))
+    assert captured["engine_kwargs"]["label_dir"] is None
+    assert captured["engine_kwargs"]["coco_annotations"] is None
+    assert report["model_contract"]["preset"] == "yolov8_coco"
+    assert report["accuracy"]["status"] == "skipped"
+    assert "accuracy skipped reason" in report_md.read_text(encoding="utf-8")
+
+
 def test_evaluate_detection_engine_debug_path_prints_sample_diagnostics(tmp_path, monkeypatch, capsys):
     image_dir = tmp_path / "images"
     label_dir = tmp_path / "labels"
@@ -365,6 +438,9 @@ def test_evaluate_detection_help_shows_debug_samples_option():
 
         assert result.exit_code == 0
         assert "--debug-samples" in result.stdout
+        assert "--model-contract" in result.stdout
+        assert "--preset" in result.stdout
+        assert "--coco-annotations" in result.stdout
 
 
 def test_cli_help_registers_evaluate_detection_command():
diff --git a/tests/test_validation_contracts.py b/tests/test_validation_contracts.py
new file mode 100644
index 0000000..70eac92
--- /dev/null
+++ b/tests/test_validation_contracts.py
@@ -0,0 +1,159 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from inferedgelab.core.detection_evaluator import Detection, DetectionEvalResult
+from inferedgelab.validation.coco import load_coco_ground_truths
+from inferedgelab.validation.model_contract import (
+    ModelContractError,
+    build_default_contract,
+    load_model_contract,
+    parse_model_contract,
+)
+from inferedgelab.validation.presets import get_preset, supported_presets
+from inferedgelab.validation.report import build_evaluation_report, render_evaluation_markdown
+from inferedgelab.validation.structural import validate_detection_structure, validate_shape
+
+
+def test_yolov8_coco_preset_builds_default_model_contract():
+    preset = get_preset("yolov8_coco")
+    contract = build_default_contract("yolov8_coco")
+
+    assert "yolov8_coco" in supported_presets()
+    assert preset.task == "object_detection"
+    assert contract.preset == "yolov8_coco"
+    assert contract.input.shape == [1, 3, 640, 640]
+    assert contract.output.type == "yolov8_detection"
+    assert len(contract.labels) == 80
+
+
+def test_parse_model_contract_rejects_preset_task_mismatch():
+    with pytest.raises(ModelContractError):
+        parse_model_contract(
+            {
+                "preset": "yolov8_coco",
+                "task": "classification",
+                "input": {"shape": [1, 3, 640, 640]},
+                "output": {"shape": [1, 84, 8400]},
+            }
+        )
+
+
+def test_example_validation_demo_contracts_are_parseable():
+    repo_root = Path(__file__).resolve().parents[1]
+
+    normal = load_model_contract(str(repo_root / "examples" / "validation_demo" / "yolov8_coco_model_contract.json"))
+    problem = load_model_contract(str(repo_root / "examples" / "validation_demo" / "problem_model_contract.json"))
+
+    assert normal.metadata["demo_case"] == "normal"
+    assert problem.metadata["demo_case"] == "problem"
+    assert problem.input.shape == [1, 3, 320, 320]
+
+
+def test_load_coco_ground_truths_maps_annotations_by_file_name():
+    fixture = Path(__file__).parent / "fixtures" / "validation" / "coco_minimal.json"
+
+    ground_truths = load_coco_ground_truths(str(fixture))
+
+    assert list(ground_truths) == ["sample.jpg"]
+    assert ground_truths["sample.jpg"][0].class_id == 0
+    assert ground_truths["sample.jpg"][0].box == pytest.approx((140.0, 150.0, 80.0, 60.0))
+
+
+def test_structural_validation_detects_invalid_detection_fields():
+    result = validate_detection_structure(
+        [[Detection(class_id=99, confidence=1.2, box=(10.0, 10.0, -5.0, 5.0))]],
+        num_classes=3,
+    )
+
+    assert result["status"] == "failed"
+    assert {issue["code"] for issue in result["issues"]} == {
+        "class_id_out_of_range",
+        "score_out_of_range",
+        "bbox_non_positive_size",
+    }
+
+
+def test_shape_validation_reports_mismatch():
+    result = validate_shape([1, 3, 320, 320], [1, 3, 640, 640])
+
+    assert result["status"] == "mismatch"
+
+
+def test_evaluation_report_marks_missing_annotations_as_accuracy_skipped():
+    eval_result = DetectionEvalResult(
+        task="detection",
+        engine="onnxruntime",
+        device="cpu",
+        sample_count=1,
+        metrics={
+            "map50": 0.0,
+            "map50_95": 0.0,
+            "f1_score": 0.0,
+            "precision": 0.0,
+            "recall": 0.0,
+        },
+        notes=["structural validation only"],
+        model_input={"name": "images", "dtype": "float32", "shape": [1, 3, 640, 640]},
+        actual_input_shape=[1, 3, 640, 640],
+        dataset={"image_dir": "images", "sample_count": 1, "accuracy_status": "skipped"},
+        evaluation_config={"input_size": 640},
+        extra={
+            "accuracy_status": "skipped",
+            "accuracy_skip_reason": "annotations missing",
+            "structural_validation": {"status": "passed", "issues": []},
+        },
+    )
+
+    report = build_evaluation_report(
+        eval_result=eval_result,
+        model_contract=build_default_contract("yolov8_coco"),
+        preset=get_preset("yolov8_coco").to_dict(),
+    )
+    markdown = render_evaluation_markdown(report)
+
+    assert report["accuracy"]["status"] == "skipped"
+    assert report["contract_validation"]["input_shape"]["status"] == "passed"
+    assert report["deployment_signal"]["decision"] == "review"
+    assert "accuracy skipped reason" in markdown
+
+
+def test_evaluation_report_blocks_contract_shape_mismatch():
+    eval_result = DetectionEvalResult(
+        task="detection",
+        engine="onnxruntime",
+        device="cpu",
+        sample_count=1,
+        metrics={
+            "map50": 0.0,
+            "map50_95": 0.0,
+            "f1_score": 0.0,
+            "precision": 0.0,
+            "recall": 0.0,
+        },
+        notes=[],
+        model_input={"name": "images", "dtype": "float32", "shape": [1, 3, 640, 640]},
+        actual_input_shape=[1, 3, 640, 640],
+        dataset={"image_dir": "images", "sample_count": 1},
+        evaluation_config={"input_size": 640},
+        extra={"accuracy_status": "skipped", "structural_validation": {"status": "passed", "issues": []}},
+    )
+    contract = parse_model_contract(
+        {
+            "preset": "yolov8_coco",
+            "task": "object_detection",
+            "input": {"shape": [1, 3, 320, 320]},
+            "output": {"shape": [1, 84, 8400]},
+        }
+    )
+
+    report = build_evaluation_report(
+        eval_result=eval_result,
+        model_contract=contract,
+        preset=get_preset("yolov8_coco").to_dict(),
+    )
+
+    assert report["contract_validation"]["input_shape"]["status"] == "mismatch"
+    assert report["deployment_signal"]["decision"] == "blocked"