Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/iterations/current.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,4 @@ The project has completed **110 milestones**, covering the full feature chain fr
| 9 | 2026-04-06 | M115 Workload Mix Optimizer | ✅ merged | PR #254, both bots approved |
| 10 | 2026-04-06 | M116 GPU Hour Calculator | ✅ merged | PR #256, both bots approved |
| 11 | 2026-04-06 | M117 Benchmark Quality Gate | ⏳ pending review | PR TBD |
| 12 | 2026-04-06 | M118 SLA Risk Score | ⏳ pending review | PR TBD |
18 changes: 18 additions & 0 deletions src/xpyd_plan/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1573,3 +1573,21 @@
"evaluate_quality_gate",
"load_gate_config",
]

from xpyd_plan.sla_risk import ( # noqa: E402
RiskFactor,
RiskLevel,
RiskScore,
SLARiskReport,
SLARiskScorer,
assess_sla_risk,
)

__all__ += [
"RiskFactor",
"RiskLevel",
"RiskScore",
"SLARiskReport",
"SLARiskScorer",
"assess_sla_risk",
]
6 changes: 6 additions & 0 deletions src/xpyd_plan/cli/_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
from xpyd_plan.cli._sglang_commands import register_sglang_commands
from xpyd_plan.cli._size_distribution import _cmd_size_distribution, add_size_distribution_parser
from xpyd_plan.cli._sla_headroom import add_sla_headroom_parser
from xpyd_plan.cli._sla_risk import register as register_sla_risk
from xpyd_plan.cli._sla_tier import add_sla_tier_parser
from xpyd_plan.cli._spike import add_spike_parser
from xpyd_plan.cli._sqlite_export import add_sqlite_export_parser
Expand Down Expand Up @@ -972,6 +973,7 @@ def main(argv: list[str] | None = None) -> None:
register_compare_backends(subparsers)
register_gpu_hours(subparsers)
register_quality_gate(subparsers)
register_sla_risk(subparsers)
register_workload_mix(subparsers)
add_rate_limit_parser(subparsers)
add_batch_analysis_parser(subparsers)
Expand Down Expand Up @@ -1338,6 +1340,10 @@ def main(argv: list[str] | None = None) -> None:
from xpyd_plan.cli._quality_gate import _run as _cmd_quality_gate

_cmd_quality_gate(args)
elif args.command == "sla-risk":
from xpyd_plan.cli._sla_risk import _cmd_sla_risk

_cmd_sla_risk(args)
else:
parser.print_help()
sys.exit(1)
106 changes: 106 additions & 0 deletions src/xpyd_plan/cli/_sla_risk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""CLI sla-risk command."""

from __future__ import annotations

import argparse
import json
import sys

from rich.console import Console
from rich.table import Table

from xpyd_plan.bench_adapter import load_benchmark_auto
from xpyd_plan.sla_risk import SLARiskScorer


def _cmd_sla_risk(args: argparse.Namespace) -> None:
"""Handle the 'sla-risk' subcommand."""
console = Console()

data = load_benchmark_auto(args.benchmark)
scorer = SLARiskScorer()
report = scorer.assess(
data,
sla_ttft_ms=args.sla_ttft,
sla_tpot_ms=args.sla_tpot,
sla_total_ms=args.sla_total,
)

output_format = getattr(args, "output_format", "table")
if output_format == "json":
json.dump(report.model_dump(), sys.stdout, indent=2)
sys.stdout.write("\n")
return

# Risk score summary
level_style = {
"low": "[bold green]LOW[/bold green]",
"moderate": "[yellow]MODERATE[/yellow]",
"high": "[bold yellow]HIGH[/bold yellow]",
"critical": "[bold red]CRITICAL[/bold red]",
}
level_val = report.risk_score.risk_level.value
styled = level_style.get(level_val, level_val)
console.print(
f"\nSLA Risk Score: [bold]{report.risk_score.total_score:.1f}"
f"[/bold] / 100 — {styled}"
)
console.print()

# Factor table
table = Table(title="Risk Factor Breakdown")
table.add_column("Factor", justify="left")
table.add_column("Score", justify="right")
table.add_column("Weight", justify="right")
table.add_column("Weighted", justify="right")
table.add_column("Detail", justify="left")

for f in report.factors:
table.add_row(
f.name,
f"{f.score:.1f}",
f"{f.weight:.0%}",
f"{f.weighted_score:.1f}",
f.detail,
)

console.print(table)
console.print()
console.print(f"[bold]{report.recommendation}[/bold]")


def register(subparsers: argparse._SubParsersAction) -> None:
"""Register the sla-risk subcommand."""
parser = subparsers.add_parser(
"sla-risk",
help="Composite SLA risk score combining headroom, tail, jitter, convergence, burn rate",
)
parser.add_argument(
"--benchmark",
required=True,
help="Path to benchmark JSON file",
)
parser.add_argument(
"--sla-ttft",
type=float,
default=None,
help="TTFT SLA threshold in ms",
)
parser.add_argument(
"--sla-tpot",
type=float,
default=None,
help="TPOT SLA threshold in ms",
)
parser.add_argument(
"--sla-total",
type=float,
default=None,
help="Total latency SLA threshold in ms",
)
parser.add_argument(
"--output-format",
choices=["table", "json"],
default="table",
help="Output format (default: table)",
)
Loading
Loading