Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -1596,3 +1596,17 @@ Help users find the **optimal Prefill:Decode instance ratio** based on **real be
- CLI `sla-risk` subcommand with `--benchmark`, `--sla-ttft`, `--sla-tpot`, `--sla-total`, table + JSON output
- Programmatic `assess_sla_risk()` API
- ~22 new tests

### M119 🔄 Deployment Readiness Report

*In progress*

- `ReadinessAssessor` class in `readiness.py`
- `ReadinessConfig`, `ReadinessCheck`, `ReadinessVerdict`, `ReadinessReport` Pydantic models
- Unified go/no-go deployment readiness assessment combining: quality gate result, SLA risk score, SLA headroom safety level, cost efficiency ratio, and rate limit headroom
- Per-check pass/warn/fail with configurable thresholds (risk score < 50, headroom > 10%, quality gate PASS, cost efficiency > 0.7, rate limit headroom > 15%)
- Overall verdict: READY (all pass), CAUTION (any warn, no fail), NOT_READY (any fail)
- Actionable deployment recommendations and blockers list
- CLI `readiness` subcommand with `--benchmark`, `--sla-ttft`, `--sla-tpot`, `--sla-total`, `--cost-model`, table + JSON output
- Programmatic `assess_readiness()` API
- ~24 new tests
5 changes: 3 additions & 2 deletions docs/iterations/current.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,6 @@ The project has completed **110 milestones**, covering the full feature chain fr
| 8 | 2026-04-06 | M114 Multi-Backend Comparison Report | ✅ merged | PR #252, both bots approved |
| 9 | 2026-04-06 | M115 Workload Mix Optimizer | ✅ merged | PR #254, both bots approved |
| 10 | 2026-04-06 | M116 GPU Hour Calculator | ✅ merged | PR #256, both bots approved |
| 11 | 2026-04-06 | M117 Benchmark Quality Gate | ⏳ pending review | PR TBD |
| 12 | 2026-04-06 | M118 SLA Risk Score | ⏳ pending review | PR TBD |
| 11 | 2026-04-06 | M117 Benchmark Quality Gate | ✅ merged | PR #258 |
| 12 | 2026-04-06 | M118 SLA Risk Score | ✅ merged | PR #260, both bots approved |
| 13 | 2026-04-06 | M119 Deployment Readiness Report | ⏳ pending review | PR TBD |
6 changes: 6 additions & 0 deletions src/xpyd_plan/cli/_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
from xpyd_plan.cli._ranking import _cmd_ranking, add_ranking_parser
from xpyd_plan.cli._rate_limit import add_rate_limit_parser
from xpyd_plan.cli._ratio_compare import add_ratio_compare_parser
from xpyd_plan.cli._readiness import register as register_readiness
from xpyd_plan.cli._recommend import _cmd_recommend
from xpyd_plan.cli._regression import _cmd_regression, add_regression_parser
from xpyd_plan.cli._replay import add_replay_parser
Expand Down Expand Up @@ -974,6 +975,7 @@ def main(argv: list[str] | None = None) -> None:
register_gpu_hours(subparsers)
register_quality_gate(subparsers)
register_sla_risk(subparsers)
register_readiness(subparsers)
register_workload_mix(subparsers)
add_rate_limit_parser(subparsers)
add_batch_analysis_parser(subparsers)
Expand Down Expand Up @@ -1344,6 +1346,10 @@ def main(argv: list[str] | None = None) -> None:
from xpyd_plan.cli._sla_risk import _cmd_sla_risk

_cmd_sla_risk(args)
elif args.command == "readiness":
from xpyd_plan.cli._readiness import _cmd_readiness

_cmd_readiness(args)
else:
parser.print_help()
sys.exit(1)
146 changes: 146 additions & 0 deletions src/xpyd_plan/cli/_readiness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
"""CLI readiness command."""

from __future__ import annotations

import argparse
import json
import sys

from rich.console import Console
from rich.table import Table

from xpyd_plan.bench_adapter import load_benchmark_auto
from xpyd_plan.readiness import ReadinessAssessor, ReadinessConfig


def _cmd_readiness(args: argparse.Namespace) -> None:
"""Handle the 'readiness' subcommand."""
console = Console()

data = load_benchmark_auto(args.benchmark)

config = ReadinessConfig()
assessor = ReadinessAssessor(config=config)
report = assessor.assess(
data,
sla_ttft_ms=args.sla_ttft,
sla_tpot_ms=args.sla_tpot,
sla_total_ms=args.sla_total,
cost_per_request=args.cost_per_request,
optimal_cost_per_request=args.optimal_cost,
measured_qps=args.measured_qps,
max_safe_qps=args.max_safe_qps,
)

output_format = getattr(args, "output_format", "table")
if output_format == "json":
json.dump(report.model_dump(), sys.stdout, indent=2)
sys.stdout.write("\n")
return

# Verdict banner
verdict_style = {
"ready": "[bold green]READY[/bold green]",
"caution": "[bold yellow]CAUTION[/bold yellow]",
"not_ready": "[bold red]NOT READY[/bold red]",
}
styled = verdict_style.get(report.verdict.value, report.verdict.value)
console.print(f"\nDeployment Readiness: {styled}")
console.print()

# Checks table
table = Table(title="Readiness Checks")
table.add_column("Check", justify="left")
table.add_column("Status", justify="center")
table.add_column("Value", justify="right")
table.add_column("Threshold", justify="left")
table.add_column("Detail", justify="left")

status_style = {
"pass": "[green]PASS[/green]",
"warn": "[yellow]WARN[/yellow]",
"fail": "[red]FAIL[/red]",
}

for c in report.checks:
table.add_row(
c.name,
status_style.get(c.status.value, c.status.value),
c.value,
c.threshold,
c.detail,
)

console.print(table)
console.print()

if report.blockers:
console.print(f"[bold red]Blockers:[/bold red] {', '.join(report.blockers)}")
if report.warnings:
console.print(f"[yellow]Warnings:[/yellow] {', '.join(report.warnings)}")

console.print(f"\n[bold]{report.recommendation}[/bold]")

if report.verdict.value == "not_ready":
sys.exit(1)


def register(subparsers: argparse._SubParsersAction) -> None:
"""Register the readiness subcommand."""
parser = subparsers.add_parser(
"readiness",
help="Unified deployment readiness assessment (go/no-go)",
)
parser.add_argument(
"--benchmark",
required=True,
help="Path to benchmark JSON file",
)
parser.add_argument(
"--sla-ttft",
type=float,
default=None,
help="TTFT SLA threshold in ms",
)
parser.add_argument(
"--sla-tpot",
type=float,
default=None,
help="TPOT SLA threshold in ms",
)
parser.add_argument(
"--sla-total",
type=float,
default=None,
help="Total latency SLA threshold in ms",
)
parser.add_argument(
"--cost-per-request",
type=float,
default=None,
help="Actual cost per request",
)
parser.add_argument(
"--optimal-cost",
type=float,
default=None,
help="Optimal cost per request baseline",
)
parser.add_argument(
"--measured-qps",
type=float,
default=None,
help="Current measured QPS",
)
parser.add_argument(
"--max-safe-qps",
type=float,
default=None,
help="Maximum safe QPS from rate limit analysis",
)
parser.add_argument(
"--output-format",
choices=["table", "json"],
default="table",
help="Output format (default: table)",
)
Loading
Loading