xPyD-hub · hlin99 · Apr 6, 2026 · Apr 6, 2026
diff --git a/ROADMAP.md b/ROADMAP.md
@@ -1596,3 +1596,17 @@ Help users find the **optimal Prefill:Decode instance ratio** based on **real be
 - CLI `sla-risk` subcommand with `--benchmark`, `--sla-ttft`, `--sla-tpot`, `--sla-total`, table + JSON output
 - Programmatic `assess_sla_risk()` API
 - ~22 new tests
+
+### M119 🔄 Deployment Readiness Report
+
+*In progress*
+
+- `ReadinessAssessor` class in `readiness.py`
+- `ReadinessConfig`, `ReadinessCheck`, `ReadinessVerdict`, `ReadinessReport` Pydantic models
+- Unified go/no-go deployment readiness assessment combining: quality gate result, SLA risk score, SLA headroom safety level, cost efficiency ratio, and rate limit headroom
+- Per-check pass/warn/fail with configurable thresholds (risk score < 50, headroom > 10%, quality gate PASS, cost efficiency > 0.7, rate limit headroom > 15%)
+- Overall verdict: READY (all pass), CAUTION (any warn, no fail), NOT_READY (any fail)
+- Actionable deployment recommendations and blockers list
+- CLI `readiness` subcommand with `--benchmark`, `--sla-ttft`, `--sla-tpot`, `--sla-total`, `--cost-model`, table + JSON output
+- Programmatic `assess_readiness()` API
+- ~24 new tests
diff --git a/docs/iterations/current.md b/docs/iterations/current.md
@@ -69,5 +69,6 @@ The project has completed **110 milestones**, covering the full feature chain fr
 | 8 | 2026-04-06 | M114 Multi-Backend Comparison Report | ✅ merged | PR #252, both bots approved |
 | 9 | 2026-04-06 | M115 Workload Mix Optimizer | ✅ merged | PR #254, both bots approved |
 | 10 | 2026-04-06 | M116 GPU Hour Calculator | ✅ merged | PR #256, both bots approved |
-| 11 | 2026-04-06 | M117 Benchmark Quality Gate | ⏳ pending review | PR TBD |
-| 12 | 2026-04-06 | M118 SLA Risk Score | ⏳ pending review | PR TBD |
+| 11 | 2026-04-06 | M117 Benchmark Quality Gate | ✅ merged | PR #258 |
+| 12 | 2026-04-06 | M118 SLA Risk Score | ✅ merged | PR #260, both bots approved |
+| 13 | 2026-04-06 | M119 Deployment Readiness Report | ⏳ pending review | PR TBD |
diff --git a/src/xpyd_plan/cli/_main.py b/src/xpyd_plan/cli/_main.py
@@ -69,6 +69,7 @@
 from xpyd_plan.cli._ranking import _cmd_ranking, add_ranking_parser
 from xpyd_plan.cli._rate_limit import add_rate_limit_parser
 from xpyd_plan.cli._ratio_compare import add_ratio_compare_parser
+from xpyd_plan.cli._readiness import register as register_readiness
 from xpyd_plan.cli._recommend import _cmd_recommend
 from xpyd_plan.cli._regression import _cmd_regression, add_regression_parser
 from xpyd_plan.cli._replay import add_replay_parser
@@ -974,6 +975,7 @@ def main(argv: list[str] | None = None) -> None:
     register_gpu_hours(subparsers)
     register_quality_gate(subparsers)
     register_sla_risk(subparsers)
+    register_readiness(subparsers)
     register_workload_mix(subparsers)
     add_rate_limit_parser(subparsers)
     add_batch_analysis_parser(subparsers)
@@ -1344,6 +1346,10 @@ def main(argv: list[str] | None = None) -> None:
         from xpyd_plan.cli._sla_risk import _cmd_sla_risk
 
         _cmd_sla_risk(args)
+    elif args.command == "readiness":
+        from xpyd_plan.cli._readiness import _cmd_readiness
+
+        _cmd_readiness(args)
     else:
         parser.print_help()
         sys.exit(1)
diff --git a/src/xpyd_plan/cli/_readiness.py b/src/xpyd_plan/cli/_readiness.py
@@ -0,0 +1,146 @@
+"""CLI readiness command."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+
+from rich.console import Console
+from rich.table import Table
+
+from xpyd_plan.bench_adapter import load_benchmark_auto
+from xpyd_plan.readiness import ReadinessAssessor, ReadinessConfig
+
+
+def _cmd_readiness(args: argparse.Namespace) -> None:
+    """Handle the 'readiness' subcommand."""
+    console = Console()
+
+    data = load_benchmark_auto(args.benchmark)
+
+    config = ReadinessConfig()
+    assessor = ReadinessAssessor(config=config)
+    report = assessor.assess(
+        data,
+        sla_ttft_ms=args.sla_ttft,
+        sla_tpot_ms=args.sla_tpot,
+        sla_total_ms=args.sla_total,
+        cost_per_request=args.cost_per_request,
+        optimal_cost_per_request=args.optimal_cost,
+        measured_qps=args.measured_qps,
+        max_safe_qps=args.max_safe_qps,
+    )
+
+    output_format = getattr(args, "output_format", "table")
+    if output_format == "json":
+        json.dump(report.model_dump(), sys.stdout, indent=2)
+        sys.stdout.write("\n")
+        return
+
+    # Verdict banner
+    verdict_style = {
+        "ready": "[bold green]READY[/bold green]",
+        "caution": "[bold yellow]CAUTION[/bold yellow]",
+        "not_ready": "[bold red]NOT READY[/bold red]",
+    }
+    styled = verdict_style.get(report.verdict.value, report.verdict.value)
+    console.print(f"\nDeployment Readiness: {styled}")
+    console.print()
+
+    # Checks table
+    table = Table(title="Readiness Checks")
+    table.add_column("Check", justify="left")
+    table.add_column("Status", justify="center")
+    table.add_column("Value", justify="right")
+    table.add_column("Threshold", justify="left")
+    table.add_column("Detail", justify="left")
+
+    status_style = {
+        "pass": "[green]PASS[/green]",
+        "warn": "[yellow]WARN[/yellow]",
+        "fail": "[red]FAIL[/red]",
+    }
+
+    for c in report.checks:
+        table.add_row(
+            c.name,
+            status_style.get(c.status.value, c.status.value),
+            c.value,
+            c.threshold,
+            c.detail,
+        )
+
+    console.print(table)
+    console.print()
+
+    if report.blockers:
+        console.print(f"[bold red]Blockers:[/bold red] {', '.join(report.blockers)}")
+    if report.warnings:
+        console.print(f"[yellow]Warnings:[/yellow] {', '.join(report.warnings)}")
+
+    console.print(f"\n[bold]{report.recommendation}[/bold]")
+
+    if report.verdict.value == "not_ready":
+        sys.exit(1)
+
+
+def register(subparsers: argparse._SubParsersAction) -> None:
+    """Register the readiness subcommand."""
+    parser = subparsers.add_parser(
+        "readiness",
+        help="Unified deployment readiness assessment (go/no-go)",
+    )
+    parser.add_argument(
+        "--benchmark",
+        required=True,
+        help="Path to benchmark JSON file",
+    )
+    parser.add_argument(
+        "--sla-ttft",
+        type=float,
+        default=None,
+        help="TTFT SLA threshold in ms",
+    )
+    parser.add_argument(
+        "--sla-tpot",
+        type=float,
+        default=None,
+        help="TPOT SLA threshold in ms",
+    )
+    parser.add_argument(
+        "--sla-total",
+        type=float,
+        default=None,
+        help="Total latency SLA threshold in ms",
+    )
+    parser.add_argument(
+        "--cost-per-request",
+        type=float,
+        default=None,
+        help="Actual cost per request",
+    )
+    parser.add_argument(
+        "--optimal-cost",
+        type=float,
+        default=None,
+        help="Optimal cost per request baseline",
+    )
+    parser.add_argument(
+        "--measured-qps",
+        type=float,
+        default=None,
+        help="Current measured QPS",
+    )
+    parser.add_argument(
+        "--max-safe-qps",
+        type=float,
+        default=None,
+        help="Maximum safe QPS from rate limit analysis",
+    )
+    parser.add_argument(
+        "--output-format",
+        choices=["table", "json"],
+        default="table",
+        help="Output format (default: table)",
+    )