From 0590a60ae26e49b3881506adab6eb63a0a5da220 Mon Sep 17 00:00:00 2001
From: hlin99 <tony.lin@intel.com>
Date: Mon, 6 Apr 2026 14:08:17 +0800
Subject: [PATCH] feat: GPU Hour Calculator (M116)

- GPUHourCalculator class in gpu_hours.py
- TrafficProfile, HourlyTraffic, GPUHourReport, ScalingSavings, HourBreakdown models
- 24-hour traffic profile with hourly QPS mapping to required instances
- Auto-scaling savings estimation (fixed vs dynamic provisioning)
- CLI gpu-hours subcommand with --benchmark, --traffic-profile, --gpu-cost
- Programmatic calculate_gpu_hours() API
- 24 new tests

Closes #255
---
 ROADMAP.md                      |  19 +-
 docs/iterations/current.md      |   3 +-
 src/xpyd_plan/__init__.py       |  19 ++
 src/xpyd_plan/cli/_gpu_hours.py | 159 ++++++++++++++++
 src/xpyd_plan/cli/_main.py      |   6 +
 src/xpyd_plan/gpu_hours.py      | 228 +++++++++++++++++++++++
 tests/test_gpu_hours.py         | 317 ++++++++++++++++++++++++++++++++
 7 files changed, 747 insertions(+), 4 deletions(-)
 create mode 100644 src/xpyd_plan/cli/_gpu_hours.py
 create mode 100644 src/xpyd_plan/gpu_hours.py
 create mode 100644 tests/test_gpu_hours.py

diff --git a/ROADMAP.md b/ROADMAP.md
index d2db68a..e67b3a9 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -1544,9 +1544,9 @@ Help users find the **optimal Prefill:Decode instance ratio** based on **real be
 - Programmatic `compare_backends()` API
 - ~25 new tests
 
-### M115 🔄 Workload Mix Optimizer
+### M115 ✅ Workload Mix Optimizer
 
-*In progress*
+*Completed — PR #254*
 
 - `WorkloadMixOptimizer` class in `workload_mix.py`
 - `WorkloadSpec`, `WorkloadAllocation`, `MixOptimizationResult` Pydantic models
@@ -1555,4 +1555,17 @@ Help users find the **optimal Prefill:Decode instance ratio** based on **real be
 - Support shared vs dedicated instance pools
 - CLI `workload-mix` subcommand with `--workload` (repeatable), `--total-gpus`, table + JSON output
 - Programmatic `optimize_workload_mix()` API
-- ~25 new tests
+- 32 new tests
+
+### M116 🔄 GPU Hour Calculator
+
+*In progress*
+
+- `GPUHourCalculator` class in `gpu_hours.py`
+- `TrafficProfile`, `HourlyTraffic`, `GPUHourReport`, `ScalingSavings`, `HourBreakdown` Pydantic models
+- 24-hour traffic profile input (hourly QPS values)
+- Map each hour's QPS to required instances using measured benchmark capacity
+- Auto-scaling savings estimation (fixed vs dynamic provisioning)
+- CLI `gpu-hours` subcommand with `--benchmark`, `--traffic-profile`, `--gpu-cost`, table + JSON output
+- Programmatic `calculate_gpu_hours()` API
+- ~24 new tests
diff --git a/docs/iterations/current.md b/docs/iterations/current.md
index b9b57d2..c3d1dd0 100644
--- a/docs/iterations/current.md
+++ b/docs/iterations/current.md
@@ -67,4 +67,5 @@ The project has completed **110 milestones**, covering the full feature chain fr
 | 6 | 2026-04-06 | M112 TensorRT-LLM Benchmark Format Importer | ✅ merged | PR #248, both bots approved |
 | 7 | 2026-04-06 | M113 TensorRT-LLM Benchmark Command Generator | ✅ merged | PR #250, both bots approved |
 | 8 | 2026-04-06 | M114 Multi-Backend Comparison Report | ✅ merged | PR #252, both bots approved |
-| 9 | 2026-04-06 | M115 Workload Mix Optimizer | ⏳ pending review | Issue #253 |
+| 9 | 2026-04-06 | M115 Workload Mix Optimizer | ✅ merged | PR #254, both bots approved |
+| 10 | 2026-04-06 | M116 GPU Hour Calculator | ⏳ pending review | Issue #255 |
diff --git a/src/xpyd_plan/__init__.py b/src/xpyd_plan/__init__.py
index f15d9eb..96d3f46 100644
--- a/src/xpyd_plan/__init__.py
+++ b/src/xpyd_plan/__init__.py
@@ -1535,3 +1535,22 @@
     "WorkloadSpec",
     "optimize_workload_mix",
 ]
+from xpyd_plan.gpu_hours import (  # noqa: E402
+    GPUHourCalculator,
+    GPUHourReport,
+    HourBreakdown,
+    HourlyTraffic,
+    ScalingSavings,
+    TrafficProfile,
+    calculate_gpu_hours,
+)
+
+__all__ += [
+    "GPUHourCalculator",
+    "GPUHourReport",
+    "HourBreakdown",
+    "HourlyTraffic",
+    "ScalingSavings",
+    "TrafficProfile",
+    "calculate_gpu_hours",
+]
diff --git a/src/xpyd_plan/cli/_gpu_hours.py b/src/xpyd_plan/cli/_gpu_hours.py
new file mode 100644
index 0000000..fa35436
--- /dev/null
+++ b/src/xpyd_plan/cli/_gpu_hours.py
@@ -0,0 +1,159 @@
+"""CLI subcommand for GPU hour calculation."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from xpyd_plan.gpu_hours import (
+    GPUHourCalculator,
+    GPUHourReport,
+    HourlyTraffic,
+    TrafficProfile,
+)
+
+
+def register(subparsers: Any) -> None:
+    """Register the gpu-hours subcommand."""
+    p = subparsers.add_parser(
+        "gpu-hours",
+        help="Estimate GPU hours and costs from traffic profiles",
+        description=(
+            "Given benchmark data and a daily traffic profile (hourly QPS), "
+            "estimate total GPU hours, costs, and auto-scaling savings."
+        ),
+    )
+    p.add_argument(
+        "--benchmark",
+        required=True,
+        help="Benchmark JSON file",
+    )
+    p.add_argument(
+        "--traffic-profile",
+        required=True,
+        help="Traffic profile YAML file (hourly QPS schedule)",
+    )
+    p.add_argument(
+        "--gpu-cost",
+        type=float,
+        default=2.0,
+        help="GPU cost per instance per hour (default: 2.0)",
+    )
+    p.add_argument(
+        "--currency",
+        default="USD",
+        help="Currency label (default: USD)",
+    )
+    p.add_argument(
+        "--output-format",
+        choices=["table", "json"],
+        default="table",
+        help="Output format (default: table)",
+    )
+    p.set_defaults(func=_run)
+
+
+def _run(args: argparse.Namespace) -> None:
+    """Execute gpu-hours subcommand."""
+    from xpyd_plan.bench_adapter import load_benchmark_auto
+
+    data = load_benchmark_auto(Path(args.benchmark))
+
+    # Load traffic profile
+    profile_path = Path(args.traffic_profile)
+    with open(profile_path) as f:
+        profile_data = yaml.safe_load(f)
+
+    hours = [
+        HourlyTraffic(hour=h["hour"], qps=h["qps"]) for h in profile_data["hours"]
+    ]
+    profile = TrafficProfile(
+        hours=hours,
+        name=profile_data.get("name", profile_path.stem),
+    )
+
+    calc = GPUHourCalculator(data)
+    report = calc.calculate(
+        profile,
+        gpu_cost_per_hour=args.gpu_cost,
+        currency=args.currency,
+    )
+
+    if args.output_format == "json":
+        json.dump(report.model_dump(), sys.stdout, indent=2)
+        sys.stdout.write("\n")
+    else:
+        _print_table(report)
+
+
+def _print_table(report: GPUHourReport) -> None:
+    """Print report as Rich table."""
+    from rich.console import Console
+    from rich.table import Table
+
+    console = Console()
+
+    # Summary
+    console.print(f"\n[bold]GPU Hour Report: {report.profile_name}[/bold]\n")
+
+    summary = Table(title="Summary")
+    summary.add_column("Metric", style="cyan")
+    summary.add_column("Value", justify="right")
+    summary.add_row("QPS per Instance", f"{report.qps_per_instance:.2f}")
+    summary.add_row("Peak QPS", f"{report.peak_qps:.1f}")
+    summary.add_row("Peak Instances", str(report.peak_instances))
+    summary.add_row("Off-Peak QPS", f"{report.off_peak_qps:.1f}")
+    summary.add_row("Off-Peak Instances", str(report.off_peak_instances))
+    summary.add_row("Avg Utilization", f"{report.avg_utilization:.1%}")
+    summary.add_row("Daily GPU Hours", f"{report.daily_gpu_hours:.1f}")
+    summary.add_row("Monthly GPU Hours", f"{report.monthly_gpu_hours:.1f}")
+    summary.add_row(
+        "Daily Cost", f"{report.daily_cost:.2f} {report.currency}"
+    )
+    summary.add_row(
+        "Monthly Cost", f"{report.monthly_cost:.2f} {report.currency}"
+    )
+    console.print(summary)
+
+    # Scaling savings
+    s = report.scaling_savings
+    savings = Table(title="Auto-Scaling Savings")
+    savings.add_column("Metric", style="cyan")
+    savings.add_column("Fixed", justify="right")
+    savings.add_column("Dynamic", justify="right")
+    savings.add_column("Saved", justify="right")
+    savings.add_row(
+        "Daily GPU Hours",
+        f"{s.fixed_daily_gpu_hours:.1f}",
+        f"{s.dynamic_daily_gpu_hours:.1f}",
+        f"{s.saved_gpu_hours:.1f} ({s.savings_percent:.1f}%)",
+    )
+    savings.add_row(
+        f"Daily Cost ({report.currency})",
+        f"{s.fixed_daily_cost:.2f}",
+        f"{s.dynamic_daily_cost:.2f}",
+        f"{s.saved_cost:.2f}",
+    )
+    console.print(savings)
+
+    # Hourly breakdown
+    hourly = Table(title="Hourly Breakdown")
+    hourly.add_column("Hour", justify="right")
+    hourly.add_column("QPS", justify="right")
+    hourly.add_column("Instances", justify="right")
+    hourly.add_column("GPU Hours", justify="right")
+    hourly.add_column(f"Cost ({report.currency})", justify="right")
+    for hb in report.hourly_breakdown:
+        hourly.add_row(
+            f"{hb.hour:02d}:00",
+            f"{hb.qps:.1f}",
+            str(hb.required_instances),
+            f"{hb.gpu_hours:.1f}",
+            f"{hb.cost:.2f}",
+        )
+    console.print(hourly)
diff --git a/src/xpyd_plan/cli/_main.py b/src/xpyd_plan/cli/_main.py
index cd4942d..3dd6226 100644
--- a/src/xpyd_plan/cli/_main.py
+++ b/src/xpyd_plan/cli/_main.py
@@ -43,6 +43,7 @@
 from xpyd_plan.cli._forecast import add_forecast_parser
 from xpyd_plan.cli._generate import _cmd_generate
 from xpyd_plan.cli._goodput import add_goodput_parser
+from xpyd_plan.cli._gpu_hours import register as register_gpu_hours
 from xpyd_plan.cli._health_check import _cmd_health_check, add_health_check_parser
 from xpyd_plan.cli._heatmap import _cmd_heatmap, add_heatmap_parser
 from xpyd_plan.cli._import import add_import_parser
@@ -968,6 +969,7 @@ def main(argv: list[str] | None = None) -> None:
     register_sglang_commands(subparsers)
     register_trtllm_commands(subparsers)
     register_compare_backends(subparsers)
+    register_gpu_hours(subparsers)
     register_workload_mix(subparsers)
     add_rate_limit_parser(subparsers)
     add_batch_analysis_parser(subparsers)
@@ -1326,6 +1328,10 @@ def main(argv: list[str] | None = None) -> None:
         from xpyd_plan.cli._workload_mix import _run as _cmd_workload_mix
 
         _cmd_workload_mix(args)
+    elif args.command == "gpu-hours":
+        from xpyd_plan.cli._gpu_hours import _run as _cmd_gpu_hours
+
+        _cmd_gpu_hours(args)
     else:
         parser.print_help()
         sys.exit(1)
diff --git a/src/xpyd_plan/gpu_hours.py b/src/xpyd_plan/gpu_hours.py
new file mode 100644
index 0000000..b95ae65
--- /dev/null
+++ b/src/xpyd_plan/gpu_hours.py
@@ -0,0 +1,228 @@
+"""GPU Hour Calculator — estimate GPU hours and costs from traffic profiles."""
+
+from __future__ import annotations
+
+import math
+from typing import List
+
+from pydantic import BaseModel, Field, field_validator
+
+from xpyd_plan.benchmark_models import BenchmarkData
+
+
+class HourlyTraffic(BaseModel):
+    """Traffic specification for a single hour."""
+
+    hour: int = Field(..., ge=0, le=23, description="Hour of day (0-23)")
+    qps: float = Field(..., ge=0, description="Expected QPS during this hour")
+
+
+class TrafficProfile(BaseModel):
+    """24-hour traffic profile."""
+
+    hours: List[HourlyTraffic] = Field(
+        ..., min_length=1, max_length=24, description="Hourly traffic specs"
+    )
+    name: str = Field(default="default", description="Profile name")
+
+    @field_validator("hours")
+    @classmethod
+    def validate_unique_hours(cls, v: list[HourlyTraffic]) -> list[HourlyTraffic]:
+        """Ensure no duplicate hours."""
+        seen = set()
+        for ht in v:
+            if ht.hour in seen:
+                raise ValueError(f"Duplicate hour: {ht.hour}")
+            seen.add(ht.hour)
+        return sorted(v, key=lambda x: x.hour)
+
+
+class HourBreakdown(BaseModel):
+    """Per-hour resource and cost breakdown."""
+
+    hour: int = Field(..., description="Hour of day")
+    qps: float = Field(..., description="Traffic QPS")
+    required_instances: int = Field(..., description="Instances needed")
+    gpu_hours: float = Field(..., description="GPU hours consumed")
+    cost: float = Field(..., description="Cost for this hour")
+
+
+class ScalingSavings(BaseModel):
+    """Savings from auto-scaling vs fixed provisioning."""
+
+    fixed_daily_gpu_hours: float = Field(..., description="GPU hours with fixed instances")
+    dynamic_daily_gpu_hours: float = Field(
+        ..., description="GPU hours with auto-scaling"
+    )
+    saved_gpu_hours: float = Field(..., description="GPU hours saved per day")
+    savings_percent: float = Field(..., description="Percentage savings")
+    fixed_daily_cost: float = Field(..., description="Daily cost with fixed instances")
+    dynamic_daily_cost: float = Field(
+        ..., description="Daily cost with auto-scaling"
+    )
+    saved_cost: float = Field(..., description="Cost saved per day")
+
+
+class GPUHourReport(BaseModel):
+    """Complete GPU hour calculation report."""
+
+    profile_name: str = Field(..., description="Traffic profile name")
+    gpu_cost_per_hour: float = Field(..., description="GPU hourly rate")
+    currency: str = Field(default="USD", description="Currency")
+    peak_qps: float = Field(..., description="Peak traffic QPS")
+    peak_instances: int = Field(..., description="Instances needed at peak")
+    off_peak_qps: float = Field(..., description="Minimum traffic QPS")
+    off_peak_instances: int = Field(..., description="Instances needed at off-peak")
+    daily_gpu_hours: float = Field(..., description="Total GPU hours per day")
+    monthly_gpu_hours: float = Field(..., description="Total GPU hours per month (30d)")
+    daily_cost: float = Field(..., description="Total cost per day")
+    monthly_cost: float = Field(..., description="Total cost per month (30d)")
+    avg_utilization: float = Field(
+        ..., description="Average utilization (dynamic/peak)"
+    )
+    hourly_breakdown: List[HourBreakdown] = Field(
+        ..., description="Per-hour breakdown"
+    )
+    scaling_savings: ScalingSavings = Field(
+        ..., description="Auto-scaling savings analysis"
+    )
+    qps_per_instance: float = Field(
+        ..., description="Estimated QPS capacity per instance"
+    )
+
+
+class GPUHourCalculator:
+    """Calculate GPU hours and costs from benchmark data and traffic profiles."""
+
+    def __init__(self, data: BenchmarkData) -> None:
+        self._data = data
+        self._qps_per_instance = self._estimate_qps_per_instance()
+
+    def _estimate_qps_per_instance(self) -> float:
+        """Estimate QPS capacity per instance from benchmark data."""
+        total_instances = (
+            self._data.metadata.num_prefill_instances
+            + self._data.metadata.num_decode_instances
+        )
+        if total_instances <= 0:
+            return 1.0
+        measured_qps = self._data.metadata.measured_qps
+        if measured_qps <= 0:
+            return 1.0
+        return measured_qps / total_instances
+
+    def _instances_for_qps(self, qps: float) -> int:
+        """Calculate minimum instances needed for a given QPS."""
+        if qps <= 0:
+            return 0
+        raw = qps / self._qps_per_instance
+        return max(1, math.ceil(raw))
+
+    def calculate(
+        self,
+        profile: TrafficProfile,
+        gpu_cost_per_hour: float = 2.0,
+        currency: str = "USD",
+    ) -> GPUHourReport:
+        """Calculate GPU hours and costs for a traffic profile."""
+        # Build full 24-hour schedule (default to 0 QPS for unspecified hours)
+        hour_map: dict[int, float] = {ht.hour: ht.qps for ht in profile.hours}
+
+        hourly_breakdown: list[HourBreakdown] = []
+        total_gpu_hours = 0.0
+        total_cost = 0.0
+        peak_qps = 0.0
+        peak_instances = 0
+        min_qps = float("inf")
+        min_instances = float("inf")
+        instance_sum = 0
+
+        for h in range(24):
+            qps = hour_map.get(h, 0.0)
+            instances = self._instances_for_qps(qps)
+            gpu_h = float(instances)  # 1 hour per instance
+            cost = gpu_h * gpu_cost_per_hour
+
+            hourly_breakdown.append(
+                HourBreakdown(
+                    hour=h,
+                    qps=qps,
+                    required_instances=instances,
+                    gpu_hours=gpu_h,
+                    cost=round(cost, 4),
+                )
+            )
+
+            total_gpu_hours += gpu_h
+            total_cost += cost
+            instance_sum += instances
+
+            if qps > peak_qps:
+                peak_qps = qps
+                peak_instances = instances
+            if qps < min_qps:
+                min_qps = qps
+                min_instances = instances
+
+        if min_qps == float("inf"):
+            min_qps = 0.0
+        if min_instances == float("inf"):
+            min_instances = 0
+
+        # Fixed provisioning = peak instances * 24h
+        fixed_gpu_hours = float(peak_instances) * 24.0
+        fixed_cost = fixed_gpu_hours * gpu_cost_per_hour
+        saved_gpu_hours = fixed_gpu_hours - total_gpu_hours
+        savings_pct = (saved_gpu_hours / fixed_gpu_hours * 100.0) if fixed_gpu_hours > 0 else 0.0
+
+        avg_util = (instance_sum / (peak_instances * 24.0)) if peak_instances > 0 else 0.0
+
+        scaling_savings = ScalingSavings(
+            fixed_daily_gpu_hours=fixed_gpu_hours,
+            dynamic_daily_gpu_hours=total_gpu_hours,
+            saved_gpu_hours=round(saved_gpu_hours, 2),
+            savings_percent=round(savings_pct, 2),
+            fixed_daily_cost=round(fixed_cost, 2),
+            dynamic_daily_cost=round(total_cost, 2),
+            saved_cost=round(fixed_cost - total_cost, 2),
+        )
+
+        return GPUHourReport(
+            profile_name=profile.name,
+            gpu_cost_per_hour=gpu_cost_per_hour,
+            currency=currency,
+            peak_qps=peak_qps,
+            peak_instances=peak_instances,
+            off_peak_qps=min_qps,
+            off_peak_instances=int(min_instances),
+            daily_gpu_hours=round(total_gpu_hours, 2),
+            monthly_gpu_hours=round(total_gpu_hours * 30, 2),
+            daily_cost=round(total_cost, 2),
+            monthly_cost=round(total_cost * 30, 2),
+            avg_utilization=round(avg_util, 4),
+            hourly_breakdown=hourly_breakdown,
+            scaling_savings=scaling_savings,
+            qps_per_instance=round(self._qps_per_instance, 4),
+        )
+
+
+def calculate_gpu_hours(
+    data: BenchmarkData,
+    profile: TrafficProfile,
+    gpu_cost_per_hour: float = 2.0,
+    currency: str = "USD",
+) -> dict:
+    """Convenience function for GPU hour calculation.
+
+    Args:
+        data: Benchmark data with measured QPS and instance counts.
+        profile: 24-hour traffic profile.
+        gpu_cost_per_hour: Cost per GPU instance per hour.
+        currency: Currency label.
+
+    Returns:
+        Dictionary with GPU hour report.
+    """
+    calc = GPUHourCalculator(data)
+    report = calc.calculate(profile, gpu_cost_per_hour, currency)
+    return report.model_dump()
diff --git a/tests/test_gpu_hours.py b/tests/test_gpu_hours.py
new file mode 100644
index 0000000..d3120fa
--- /dev/null
+++ b/tests/test_gpu_hours.py
@@ -0,0 +1,317 @@
+"""Tests for GPU Hour Calculator (M116)."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+
+from xpyd_plan.benchmark_models import (
+    BenchmarkData,
+    BenchmarkMetadata,
+    BenchmarkRequest,
+)
+from xpyd_plan.gpu_hours import (
+    GPUHourCalculator,
+    HourlyTraffic,
+    TrafficProfile,
+    calculate_gpu_hours,
+)
+
+
+def _make_data(
+    measured_qps: float = 100.0,
+    num_prefill: int = 2,
+    num_decode: int = 2,
+) -> BenchmarkData:
+    """Create minimal benchmark data."""
+    requests = [
+        BenchmarkRequest(
+            request_id=f"r{i}",
+            prompt_tokens=100,
+            output_tokens=50,
+            ttft_ms=20.0,
+            tpot_ms=10.0,
+            total_latency_ms=30.0,
+            timestamp=float(i),
+        )
+        for i in range(100)
+    ]
+    return BenchmarkData(
+        metadata=BenchmarkMetadata(
+            num_prefill_instances=num_prefill,
+            num_decode_instances=num_decode,
+            total_instances=num_prefill + num_decode,
+            measured_qps=measured_qps,
+        ),
+        requests=requests,
+    )
+
+
+def _make_profile(hours: list[tuple[int, float]] | None = None) -> TrafficProfile:
+    """Create a traffic profile."""
+    if hours is None:
+        hours = [(h, 50.0) for h in range(24)]
+    return TrafficProfile(
+        hours=[HourlyTraffic(hour=h, qps=q) for h, q in hours],
+        name="test-profile",
+    )
+
+
+# --- Model tests ---
+
+
+class TestHourlyTraffic:
+    def test_valid(self):
+        ht = HourlyTraffic(hour=0, qps=10.0)
+        assert ht.hour == 0
+        assert ht.qps == 10.0
+
+    def test_invalid_hour(self):
+        with pytest.raises(Exception):
+            HourlyTraffic(hour=25, qps=10.0)
+
+    def test_negative_qps(self):
+        with pytest.raises(Exception):
+            HourlyTraffic(hour=0, qps=-1.0)
+
+
+class TestTrafficProfile:
+    def test_valid(self):
+        profile = _make_profile([(0, 10.0), (12, 50.0)])
+        assert len(profile.hours) == 2
+
+    def test_duplicate_hours_rejected(self):
+        with pytest.raises(Exception):
+            TrafficProfile(
+                hours=[
+                    HourlyTraffic(hour=0, qps=10.0),
+                    HourlyTraffic(hour=0, qps=20.0),
+                ],
+            )
+
+    def test_sorted_by_hour(self):
+        profile = TrafficProfile(
+            hours=[
+                HourlyTraffic(hour=12, qps=50.0),
+                HourlyTraffic(hour=0, qps=10.0),
+            ],
+        )
+        assert profile.hours[0].hour == 0
+        assert profile.hours[1].hour == 12
+
+
+# --- Calculator tests ---
+
+
+class TestGPUHourCalculator:
+    def test_qps_per_instance(self):
+        data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2)
+        calc = GPUHourCalculator(data)
+        assert calc._qps_per_instance == 25.0  # 100 / 4
+
+    def test_instances_for_qps(self):
+        data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2)
+        calc = GPUHourCalculator(data)
+        # 25 QPS per instance, so 50 QPS needs 2 instances
+        assert calc._instances_for_qps(50.0) == 2
+        # 51 needs 3
+        assert calc._instances_for_qps(51.0) == 3
+        # 0 needs 0
+        assert calc._instances_for_qps(0.0) == 0
+
+    def test_uniform_traffic(self):
+        data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2)
+        calc = GPUHourCalculator(data)
+        profile = _make_profile()  # 50 QPS every hour
+        report = calc.calculate(profile, gpu_cost_per_hour=2.0)
+        # 50 QPS / 25 per instance = 2 instances every hour
+        assert report.peak_instances == 2
+        assert report.off_peak_instances == 2
+        assert report.daily_gpu_hours == 48.0  # 2 * 24
+        assert report.monthly_gpu_hours == 48.0 * 30
+
+    def test_variable_traffic(self):
+        data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2)
+        calc = GPUHourCalculator(data)
+        # Peak at hour 12 (100 QPS = 4 instances), off-peak at hour 0 (10 QPS = 1 instance)
+        hours = [(0, 10.0), (12, 100.0)]
+        profile = _make_profile(hours)
+        report = calc.calculate(profile, gpu_cost_per_hour=3.0)
+        assert report.peak_qps == 100.0
+        assert report.peak_instances == 4
+        assert report.off_peak_qps == 0.0  # unspecified hours = 0
+        assert report.off_peak_instances == 0
+
+    def test_scaling_savings(self):
+        data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2)
+        calc = GPUHourCalculator(data)
+        # Only 1 hour of peak traffic
+        hours = [(12, 100.0)]
+        profile = _make_profile(hours)
+        report = calc.calculate(profile, gpu_cost_per_hour=2.0)
+        s = report.scaling_savings
+        # Fixed = 4 * 24 = 96 GPU hours
+        assert s.fixed_daily_gpu_hours == 96.0
+        # Dynamic = 4 (hour 12) + 0 (other 23 hours) = 4
+        assert s.dynamic_daily_gpu_hours == 4.0
+        assert s.saved_gpu_hours == 92.0
+        assert s.savings_percent > 95.0
+
+    def test_hourly_breakdown_has_24_entries(self):
+        data = _make_data()
+        calc = GPUHourCalculator(data)
+        profile = _make_profile([(6, 50.0)])
+        report = calc.calculate(profile)
+        assert len(report.hourly_breakdown) == 24
+
+    def test_zero_qps_hours(self):
+        data = _make_data()
+        calc = GPUHourCalculator(data)
+        profile = _make_profile([(12, 50.0)])
+        report = calc.calculate(profile)
+        # Hour 0 should have 0 instances
+        h0 = report.hourly_breakdown[0]
+        assert h0.required_instances == 0
+        assert h0.gpu_hours == 0.0
+
+    def test_cost_calculation(self):
+        data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2)
+        calc = GPUHourCalculator(data)
+        profile = _make_profile()  # 50 QPS * 24h
+        report = calc.calculate(profile, gpu_cost_per_hour=5.0, currency="EUR")
+        assert report.currency == "EUR"
+        assert report.daily_cost == 48.0 * 5.0  # 2 instances * 24h * $5
+        assert report.monthly_cost == report.daily_cost * 30
+
+    def test_report_model_dump(self):
+        data = _make_data()
+        calc = GPUHourCalculator(data)
+        profile = _make_profile([(12, 50.0)])
+        report = calc.calculate(profile)
+        d = report.model_dump()
+        assert "hourly_breakdown" in d
+        assert "scaling_savings" in d
+        assert "qps_per_instance" in d
+
+    def test_low_measured_qps(self):
+        data = _make_data(measured_qps=0.5, num_prefill=1, num_decode=1)
+        calc = GPUHourCalculator(data)
+        assert calc._qps_per_instance == 0.25
+
+    def test_low_instances(self):
+        data = _make_data(measured_qps=100.0, num_prefill=1, num_decode=1)
+        calc = GPUHourCalculator(data)
+        assert calc._qps_per_instance == 50.0
+
+
+# --- Convenience function ---
+
+
+class TestCalculateGPUHours:
+    def test_returns_dict(self):
+        data = _make_data()
+        profile = _make_profile([(12, 50.0)])
+        result = calculate_gpu_hours(data, profile, gpu_cost_per_hour=2.0)
+        assert isinstance(result, dict)
+        assert "daily_gpu_hours" in result
+        assert "scaling_savings" in result
+
+    def test_custom_currency(self):
+        data = _make_data()
+        profile = _make_profile([(12, 50.0)])
+        result = calculate_gpu_hours(data, profile, currency="CNY")
+        assert result["currency"] == "CNY"
+
+
+# --- Public imports ---
+
+
+class TestPublicImports:
+    def test_imports_from_package(self):
+        import xpyd_plan
+
+        assert hasattr(xpyd_plan, "GPUHourCalculator")
+        assert hasattr(xpyd_plan, "GPUHourReport")
+        assert hasattr(xpyd_plan, "HourBreakdown")
+        assert hasattr(xpyd_plan, "HourlyTraffic")
+        assert hasattr(xpyd_plan, "ScalingSavings")
+        assert hasattr(xpyd_plan, "TrafficProfile")
+        assert hasattr(xpyd_plan, "calculate_gpu_hours")
+
+
+# --- CLI test ---
+
+
+class TestCLI:
+    def test_gpu_hours_json_output(self, tmp_path: Path):
+        """Test CLI produces valid JSON output."""
+        data = _make_data()
+        bench_path = tmp_path / "bench.json"
+        bench_path.write_text(json.dumps(data.model_dump()))
+
+        profile_data = {
+            "name": "test",
+            "hours": [{"hour": 8, "qps": 50.0}, {"hour": 20, "qps": 10.0}],
+        }
+        import yaml
+
+        profile_path = tmp_path / "traffic.yaml"
+        profile_path.write_text(yaml.dump(profile_data))
+
+        import argparse
+
+        from xpyd_plan.cli._gpu_hours import _run
+
+        args = argparse.Namespace(
+            benchmark=str(bench_path),
+            traffic_profile=str(profile_path),
+            gpu_cost=2.0,
+            currency="USD",
+            output_format="json",
+        )
+
+        import io
+        import sys
+
+        captured = io.StringIO()
+        old_stdout = sys.stdout
+        sys.stdout = captured
+        try:
+            _run(args)
+        finally:
+            sys.stdout = old_stdout
+
+        output = json.loads(captured.getvalue())
+        assert "daily_gpu_hours" in output
+        assert "scaling_savings" in output
+
+
+# --- Edge cases ---
+
+
+class TestEdgeCases:
+    def test_all_24_hours_specified(self):
+        data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2)
+        calc = GPUHourCalculator(data)
+        hours = [(h, float(h * 5 + 10)) for h in range(24)]
+        profile = _make_profile(hours)
+        report = calc.calculate(profile)
+        assert len(report.hourly_breakdown) == 24
+        assert report.peak_qps == 23 * 5 + 10  # hour 23
+
+    def test_single_hour_profile(self):
+        data = _make_data()
+        calc = GPUHourCalculator(data)
+        profile = _make_profile([(12, 100.0)])
+        report = calc.calculate(profile)
+        assert report.off_peak_qps == 0.0
+        assert report.peak_qps == 100.0
+
+    def test_avg_utilization_range(self):
+        data = _make_data()
+        calc = GPUHourCalculator(data)
+        profile = _make_profile()
+        report = calc.calculate(profile)
+        assert 0.0 <= report.avg_utilization <= 1.0