xPyD-hub · hlin99 · Apr 6, 2026 · Apr 6, 2026
diff --git a/ROADMAP.md b/ROADMAP.md
@@ -1544,9 +1544,9 @@ Help users find the **optimal Prefill:Decode instance ratio** based on **real be
 - Programmatic `compare_backends()` API
 - ~25 new tests
 
-### M115 🔄 Workload Mix Optimizer
+### M115 ✅ Workload Mix Optimizer
 
-*In progress*
+*Completed — PR #254*
 
 - `WorkloadMixOptimizer` class in `workload_mix.py`
 - `WorkloadSpec`, `WorkloadAllocation`, `MixOptimizationResult` Pydantic models
@@ -1555,4 +1555,17 @@ Help users find the **optimal Prefill:Decode instance ratio** based on **real be
 - Support shared vs dedicated instance pools
 - CLI `workload-mix` subcommand with `--workload` (repeatable), `--total-gpus`, table + JSON output
 - Programmatic `optimize_workload_mix()` API
-- ~25 new tests
+- 32 new tests
+
+### M116 🔄 GPU Hour Calculator
+
+*In progress*
+
+- `GPUHourCalculator` class in `gpu_hours.py`
+- `TrafficProfile`, `HourlyTraffic`, `GPUHourReport`, `ScalingSavings`, `HourBreakdown` Pydantic models
+- 24-hour traffic profile input (hourly QPS values)
+- Map each hour's QPS to required instances using measured benchmark capacity
+- Auto-scaling savings estimation (fixed vs dynamic provisioning)
+- CLI `gpu-hours` subcommand with `--benchmark`, `--traffic-profile`, `--gpu-cost`, table + JSON output
+- Programmatic `calculate_gpu_hours()` API
+- ~24 new tests
diff --git a/docs/iterations/current.md b/docs/iterations/current.md
@@ -67,4 +67,5 @@ The project has completed **110 milestones**, covering the full feature chain fr
 | 6 | 2026-04-06 | M112 TensorRT-LLM Benchmark Format Importer | ✅ merged | PR #248, both bots approved |
 | 7 | 2026-04-06 | M113 TensorRT-LLM Benchmark Command Generator | ✅ merged | PR #250, both bots approved |
 | 8 | 2026-04-06 | M114 Multi-Backend Comparison Report | ✅ merged | PR #252, both bots approved |
-| 9 | 2026-04-06 | M115 Workload Mix Optimizer | ⏳ pending review | Issue #253 |
+| 9 | 2026-04-06 | M115 Workload Mix Optimizer | ✅ merged | PR #254, both bots approved |
+| 10 | 2026-04-06 | M116 GPU Hour Calculator | ⏳ pending review | Issue #255 |
diff --git a/src/xpyd_plan/__init__.py b/src/xpyd_plan/__init__.py
@@ -1535,3 +1535,22 @@
     "WorkloadSpec",
     "optimize_workload_mix",
 ]
+from xpyd_plan.gpu_hours import (  # noqa: E402
+    GPUHourCalculator,
+    GPUHourReport,
+    HourBreakdown,
+    HourlyTraffic,
+    ScalingSavings,
+    TrafficProfile,
+    calculate_gpu_hours,
+)
+
+__all__ += [
+    "GPUHourCalculator",
+    "GPUHourReport",
+    "HourBreakdown",
+    "HourlyTraffic",
+    "ScalingSavings",
+    "TrafficProfile",
+    "calculate_gpu_hours",
+]
diff --git a/src/xpyd_plan/cli/_gpu_hours.py b/src/xpyd_plan/cli/_gpu_hours.py
@@ -0,0 +1,159 @@
+"""CLI subcommand for GPU hour calculation."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from xpyd_plan.gpu_hours import (
+    GPUHourCalculator,
+    GPUHourReport,
+    HourlyTraffic,
+    TrafficProfile,
+)
+
+
+def register(subparsers: Any) -> None:
+    """Register the gpu-hours subcommand."""
+    p = subparsers.add_parser(
+        "gpu-hours",
+        help="Estimate GPU hours and costs from traffic profiles",
+        description=(
+            "Given benchmark data and a daily traffic profile (hourly QPS), "
+            "estimate total GPU hours, costs, and auto-scaling savings."
+        ),
+    )
+    p.add_argument(
+        "--benchmark",
+        required=True,
+        help="Benchmark JSON file",
+    )
+    p.add_argument(
+        "--traffic-profile",
+        required=True,
+        help="Traffic profile YAML file (hourly QPS schedule)",
+    )
+    p.add_argument(
+        "--gpu-cost",
+        type=float,
+        default=2.0,
+        help="GPU cost per instance per hour (default: 2.0)",
+    )
+    p.add_argument(
+        "--currency",
+        default="USD",
+        help="Currency label (default: USD)",
+    )
+    p.add_argument(
+        "--output-format",
+        choices=["table", "json"],
+        default="table",
+        help="Output format (default: table)",
+    )
+    p.set_defaults(func=_run)
+
+
+def _run(args: argparse.Namespace) -> None:
+    """Execute gpu-hours subcommand."""
+    from xpyd_plan.bench_adapter import load_benchmark_auto
+
+    data = load_benchmark_auto(Path(args.benchmark))
+
+    # Load traffic profile
+    profile_path = Path(args.traffic_profile)
+    with open(profile_path) as f:
+        profile_data = yaml.safe_load(f)
+
+    hours = [
+        HourlyTraffic(hour=h["hour"], qps=h["qps"]) for h in profile_data["hours"]
+    ]
+    profile = TrafficProfile(
+        hours=hours,
+        name=profile_data.get("name", profile_path.stem),
+    )
+
+    calc = GPUHourCalculator(data)
+    report = calc.calculate(
+        profile,
+        gpu_cost_per_hour=args.gpu_cost,
+        currency=args.currency,
+    )
+
+    if args.output_format == "json":
+        json.dump(report.model_dump(), sys.stdout, indent=2)
+        sys.stdout.write("\n")
+    else:
+        _print_table(report)
+
+
+def _print_table(report: GPUHourReport) -> None:
+    """Print report as Rich table."""
+    from rich.console import Console
+    from rich.table import Table
+
+    console = Console()
+
+    # Summary
+    console.print(f"\n[bold]GPU Hour Report: {report.profile_name}[/bold]\n")
+
+    summary = Table(title="Summary")
+    summary.add_column("Metric", style="cyan")
+    summary.add_column("Value", justify="right")
+    summary.add_row("QPS per Instance", f"{report.qps_per_instance:.2f}")
+    summary.add_row("Peak QPS", f"{report.peak_qps:.1f}")
+    summary.add_row("Peak Instances", str(report.peak_instances))
+    summary.add_row("Off-Peak QPS", f"{report.off_peak_qps:.1f}")
+    summary.add_row("Off-Peak Instances", str(report.off_peak_instances))
+    summary.add_row("Avg Utilization", f"{report.avg_utilization:.1%}")
+    summary.add_row("Daily GPU Hours", f"{report.daily_gpu_hours:.1f}")
+    summary.add_row("Monthly GPU Hours", f"{report.monthly_gpu_hours:.1f}")
+    summary.add_row(
+        "Daily Cost", f"{report.daily_cost:.2f} {report.currency}"
+    )
+    summary.add_row(
+        "Monthly Cost", f"{report.monthly_cost:.2f} {report.currency}"
+    )
+    console.print(summary)
+
+    # Scaling savings
+    s = report.scaling_savings
+    savings = Table(title="Auto-Scaling Savings")
+    savings.add_column("Metric", style="cyan")
+    savings.add_column("Fixed", justify="right")
+    savings.add_column("Dynamic", justify="right")
+    savings.add_column("Saved", justify="right")
+    savings.add_row(
+        "Daily GPU Hours",
+        f"{s.fixed_daily_gpu_hours:.1f}",
+        f"{s.dynamic_daily_gpu_hours:.1f}",
+        f"{s.saved_gpu_hours:.1f} ({s.savings_percent:.1f}%)",
+    )
+    savings.add_row(
+        f"Daily Cost ({report.currency})",
+        f"{s.fixed_daily_cost:.2f}",
+        f"{s.dynamic_daily_cost:.2f}",
+        f"{s.saved_cost:.2f}",
+    )
+    console.print(savings)
+
+    # Hourly breakdown
+    hourly = Table(title="Hourly Breakdown")
+    hourly.add_column("Hour", justify="right")
+    hourly.add_column("QPS", justify="right")
+    hourly.add_column("Instances", justify="right")
+    hourly.add_column("GPU Hours", justify="right")
+    hourly.add_column(f"Cost ({report.currency})", justify="right")
+    for hb in report.hourly_breakdown:
+        hourly.add_row(
+            f"{hb.hour:02d}:00",
+            f"{hb.qps:.1f}",
+            str(hb.required_instances),
+            f"{hb.gpu_hours:.1f}",
+            f"{hb.cost:.2f}",
+        )
+    console.print(hourly)
diff --git a/src/xpyd_plan/cli/_main.py b/src/xpyd_plan/cli/_main.py
@@ -43,6 +43,7 @@
 from xpyd_plan.cli._forecast import add_forecast_parser
 from xpyd_plan.cli._generate import _cmd_generate
 from xpyd_plan.cli._goodput import add_goodput_parser
+from xpyd_plan.cli._gpu_hours import register as register_gpu_hours
 from xpyd_plan.cli._health_check import _cmd_health_check, add_health_check_parser
 from xpyd_plan.cli._heatmap import _cmd_heatmap, add_heatmap_parser
 from xpyd_plan.cli._import import add_import_parser
@@ -968,6 +969,7 @@ def main(argv: list[str] | None = None) -> None:
     register_sglang_commands(subparsers)
     register_trtllm_commands(subparsers)
     register_compare_backends(subparsers)
+    register_gpu_hours(subparsers)
     register_workload_mix(subparsers)
     add_rate_limit_parser(subparsers)
     add_batch_analysis_parser(subparsers)
@@ -1326,6 +1328,10 @@ def main(argv: list[str] | None = None) -> None:
         from xpyd_plan.cli._workload_mix import _run as _cmd_workload_mix
 
         _cmd_workload_mix(args)
+    elif args.command == "gpu-hours":
+        from xpyd_plan.cli._gpu_hours import _run as _cmd_gpu_hours
+
+        _cmd_gpu_hours(args)
     else:
         parser.print_help()
         sys.exit(1)