From 0590a60ae26e49b3881506adab6eb63a0a5da220 Mon Sep 17 00:00:00 2001 From: hlin99 Date: Mon, 6 Apr 2026 14:08:17 +0800 Subject: [PATCH] feat: GPU Hour Calculator (M116) - GPUHourCalculator class in gpu_hours.py - TrafficProfile, HourlyTraffic, GPUHourReport, ScalingSavings, HourBreakdown models - 24-hour traffic profile with hourly QPS mapping to required instances - Auto-scaling savings estimation (fixed vs dynamic provisioning) - CLI gpu-hours subcommand with --benchmark, --traffic-profile, --gpu-cost - Programmatic calculate_gpu_hours() API - 24 new tests Closes #255 --- ROADMAP.md | 19 +- docs/iterations/current.md | 3 +- src/xpyd_plan/__init__.py | 19 ++ src/xpyd_plan/cli/_gpu_hours.py | 159 ++++++++++++++++ src/xpyd_plan/cli/_main.py | 6 + src/xpyd_plan/gpu_hours.py | 228 +++++++++++++++++++++++ tests/test_gpu_hours.py | 317 ++++++++++++++++++++++++++++++++ 7 files changed, 747 insertions(+), 4 deletions(-) create mode 100644 src/xpyd_plan/cli/_gpu_hours.py create mode 100644 src/xpyd_plan/gpu_hours.py create mode 100644 tests/test_gpu_hours.py diff --git a/ROADMAP.md b/ROADMAP.md index d2db68a..e67b3a9 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1544,9 +1544,9 @@ Help users find the **optimal Prefill:Decode instance ratio** based on **real be - Programmatic `compare_backends()` API - ~25 new tests -### M115 🔄 Workload Mix Optimizer +### M115 ✅ Workload Mix Optimizer -*In progress* +*Completed — PR #254* - `WorkloadMixOptimizer` class in `workload_mix.py` - `WorkloadSpec`, `WorkloadAllocation`, `MixOptimizationResult` Pydantic models @@ -1555,4 +1555,17 @@ Help users find the **optimal Prefill:Decode instance ratio** based on **real be - Support shared vs dedicated instance pools - CLI `workload-mix` subcommand with `--workload` (repeatable), `--total-gpus`, table + JSON output - Programmatic `optimize_workload_mix()` API -- ~25 new tests +- 32 new tests + +### M116 🔄 GPU Hour Calculator + +*In progress* + +- `GPUHourCalculator` class in `gpu_hours.py` +- `TrafficProfile`, `HourlyTraffic`, `GPUHourReport`, `ScalingSavings`, `HourBreakdown` Pydantic models +- 24-hour traffic profile input (hourly QPS values) +- Map each hour's QPS to required instances using measured benchmark capacity +- Auto-scaling savings estimation (fixed vs dynamic provisioning) +- CLI `gpu-hours` subcommand with `--benchmark`, `--traffic-profile`, `--gpu-cost`, table + JSON output +- Programmatic `calculate_gpu_hours()` API +- ~24 new tests diff --git a/docs/iterations/current.md b/docs/iterations/current.md index b9b57d2..c3d1dd0 100644 --- a/docs/iterations/current.md +++ b/docs/iterations/current.md @@ -67,4 +67,5 @@ The project has completed **110 milestones**, covering the full feature chain fr | 6 | 2026-04-06 | M112 TensorRT-LLM Benchmark Format Importer | ✅ merged | PR #248, both bots approved | | 7 | 2026-04-06 | M113 TensorRT-LLM Benchmark Command Generator | ✅ merged | PR #250, both bots approved | | 8 | 2026-04-06 | M114 Multi-Backend Comparison Report | ✅ merged | PR #252, both bots approved | -| 9 | 2026-04-06 | M115 Workload Mix Optimizer | ⏳ pending review | Issue #253 | +| 9 | 2026-04-06 | M115 Workload Mix Optimizer | ✅ merged | PR #254, both bots approved | +| 10 | 2026-04-06 | M116 GPU Hour Calculator | ⏳ pending review | Issue #255 | diff --git a/src/xpyd_plan/__init__.py b/src/xpyd_plan/__init__.py index f15d9eb..96d3f46 100644 --- a/src/xpyd_plan/__init__.py +++ b/src/xpyd_plan/__init__.py @@ -1535,3 +1535,22 @@ "WorkloadSpec", "optimize_workload_mix", ] +from xpyd_plan.gpu_hours import ( # noqa: E402 + GPUHourCalculator, + GPUHourReport, + HourBreakdown, + HourlyTraffic, + ScalingSavings, + TrafficProfile, + calculate_gpu_hours, +) + +__all__ += [ + "GPUHourCalculator", + "GPUHourReport", + "HourBreakdown", + "HourlyTraffic", + "ScalingSavings", + "TrafficProfile", + "calculate_gpu_hours", +] diff --git a/src/xpyd_plan/cli/_gpu_hours.py b/src/xpyd_plan/cli/_gpu_hours.py new file mode 100644 index 0000000..fa35436 --- /dev/null +++ b/src/xpyd_plan/cli/_gpu_hours.py @@ -0,0 +1,159 @@ +"""CLI subcommand for GPU hour calculation.""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import Any + +import yaml + +from xpyd_plan.gpu_hours import ( + GPUHourCalculator, + GPUHourReport, + HourlyTraffic, + TrafficProfile, +) + + +def register(subparsers: Any) -> None: + """Register the gpu-hours subcommand.""" + p = subparsers.add_parser( + "gpu-hours", + help="Estimate GPU hours and costs from traffic profiles", + description=( + "Given benchmark data and a daily traffic profile (hourly QPS), " + "estimate total GPU hours, costs, and auto-scaling savings." + ), + ) + p.add_argument( + "--benchmark", + required=True, + help="Benchmark JSON file", + ) + p.add_argument( + "--traffic-profile", + required=True, + help="Traffic profile YAML file (hourly QPS schedule)", + ) + p.add_argument( + "--gpu-cost", + type=float, + default=2.0, + help="GPU cost per instance per hour (default: 2.0)", + ) + p.add_argument( + "--currency", + default="USD", + help="Currency label (default: USD)", + ) + p.add_argument( + "--output-format", + choices=["table", "json"], + default="table", + help="Output format (default: table)", + ) + p.set_defaults(func=_run) + + +def _run(args: argparse.Namespace) -> None: + """Execute gpu-hours subcommand.""" + from xpyd_plan.bench_adapter import load_benchmark_auto + + data = load_benchmark_auto(Path(args.benchmark)) + + # Load traffic profile + profile_path = Path(args.traffic_profile) + with open(profile_path) as f: + profile_data = yaml.safe_load(f) + + hours = [ + HourlyTraffic(hour=h["hour"], qps=h["qps"]) for h in profile_data["hours"] + ] + profile = TrafficProfile( + hours=hours, + name=profile_data.get("name", profile_path.stem), + ) + + calc = GPUHourCalculator(data) + report = calc.calculate( + profile, + gpu_cost_per_hour=args.gpu_cost, + currency=args.currency, + ) + + if args.output_format == "json": + json.dump(report.model_dump(), sys.stdout, indent=2) + sys.stdout.write("\n") + else: + _print_table(report) + + +def _print_table(report: GPUHourReport) -> None: + """Print report as Rich table.""" + from rich.console import Console + from rich.table import Table + + console = Console() + + # Summary + console.print(f"\n[bold]GPU Hour Report: {report.profile_name}[/bold]\n") + + summary = Table(title="Summary") + summary.add_column("Metric", style="cyan") + summary.add_column("Value", justify="right") + summary.add_row("QPS per Instance", f"{report.qps_per_instance:.2f}") + summary.add_row("Peak QPS", f"{report.peak_qps:.1f}") + summary.add_row("Peak Instances", str(report.peak_instances)) + summary.add_row("Off-Peak QPS", f"{report.off_peak_qps:.1f}") + summary.add_row("Off-Peak Instances", str(report.off_peak_instances)) + summary.add_row("Avg Utilization", f"{report.avg_utilization:.1%}") + summary.add_row("Daily GPU Hours", f"{report.daily_gpu_hours:.1f}") + summary.add_row("Monthly GPU Hours", f"{report.monthly_gpu_hours:.1f}") + summary.add_row( + "Daily Cost", f"{report.daily_cost:.2f} {report.currency}" + ) + summary.add_row( + "Monthly Cost", f"{report.monthly_cost:.2f} {report.currency}" + ) + console.print(summary) + + # Scaling savings + s = report.scaling_savings + savings = Table(title="Auto-Scaling Savings") + savings.add_column("Metric", style="cyan") + savings.add_column("Fixed", justify="right") + savings.add_column("Dynamic", justify="right") + savings.add_column("Saved", justify="right") + savings.add_row( + "Daily GPU Hours", + f"{s.fixed_daily_gpu_hours:.1f}", + f"{s.dynamic_daily_gpu_hours:.1f}", + f"{s.saved_gpu_hours:.1f} ({s.savings_percent:.1f}%)", + ) + savings.add_row( + f"Daily Cost ({report.currency})", + f"{s.fixed_daily_cost:.2f}", + f"{s.dynamic_daily_cost:.2f}", + f"{s.saved_cost:.2f}", + ) + console.print(savings) + + # Hourly breakdown + hourly = Table(title="Hourly Breakdown") + hourly.add_column("Hour", justify="right") + hourly.add_column("QPS", justify="right") + hourly.add_column("Instances", justify="right") + hourly.add_column("GPU Hours", justify="right") + hourly.add_column(f"Cost ({report.currency})", justify="right") + for hb in report.hourly_breakdown: + hourly.add_row( + f"{hb.hour:02d}:00", + f"{hb.qps:.1f}", + str(hb.required_instances), + f"{hb.gpu_hours:.1f}", + f"{hb.cost:.2f}", + ) + console.print(hourly) diff --git a/src/xpyd_plan/cli/_main.py b/src/xpyd_plan/cli/_main.py index cd4942d..3dd6226 100644 --- a/src/xpyd_plan/cli/_main.py +++ b/src/xpyd_plan/cli/_main.py @@ -43,6 +43,7 @@ from xpyd_plan.cli._forecast import add_forecast_parser from xpyd_plan.cli._generate import _cmd_generate from xpyd_plan.cli._goodput import add_goodput_parser +from xpyd_plan.cli._gpu_hours import register as register_gpu_hours from xpyd_plan.cli._health_check import _cmd_health_check, add_health_check_parser from xpyd_plan.cli._heatmap import _cmd_heatmap, add_heatmap_parser from xpyd_plan.cli._import import add_import_parser @@ -968,6 +969,7 @@ def main(argv: list[str] | None = None) -> None: register_sglang_commands(subparsers) register_trtllm_commands(subparsers) register_compare_backends(subparsers) + register_gpu_hours(subparsers) register_workload_mix(subparsers) add_rate_limit_parser(subparsers) add_batch_analysis_parser(subparsers) @@ -1326,6 +1328,10 @@ def main(argv: list[str] | None = None) -> None: from xpyd_plan.cli._workload_mix import _run as _cmd_workload_mix _cmd_workload_mix(args) + elif args.command == "gpu-hours": + from xpyd_plan.cli._gpu_hours import _run as _cmd_gpu_hours + + _cmd_gpu_hours(args) else: parser.print_help() sys.exit(1) diff --git a/src/xpyd_plan/gpu_hours.py b/src/xpyd_plan/gpu_hours.py new file mode 100644 index 0000000..b95ae65 --- /dev/null +++ b/src/xpyd_plan/gpu_hours.py @@ -0,0 +1,228 @@ +"""GPU Hour Calculator — estimate GPU hours and costs from traffic profiles.""" + +from __future__ import annotations + +import math +from typing import List + +from pydantic import BaseModel, Field, field_validator + +from xpyd_plan.benchmark_models import BenchmarkData + + +class HourlyTraffic(BaseModel): + """Traffic specification for a single hour.""" + + hour: int = Field(..., ge=0, le=23, description="Hour of day (0-23)") + qps: float = Field(..., ge=0, description="Expected QPS during this hour") + + +class TrafficProfile(BaseModel): + """24-hour traffic profile.""" + + hours: List[HourlyTraffic] = Field( + ..., min_length=1, max_length=24, description="Hourly traffic specs" + ) + name: str = Field(default="default", description="Profile name") + + @field_validator("hours") + @classmethod + def validate_unique_hours(cls, v: list[HourlyTraffic]) -> list[HourlyTraffic]: + """Ensure no duplicate hours.""" + seen = set() + for ht in v: + if ht.hour in seen: + raise ValueError(f"Duplicate hour: {ht.hour}") + seen.add(ht.hour) + return sorted(v, key=lambda x: x.hour) + + +class HourBreakdown(BaseModel): + """Per-hour resource and cost breakdown.""" + + hour: int = Field(..., description="Hour of day") + qps: float = Field(..., description="Traffic QPS") + required_instances: int = Field(..., description="Instances needed") + gpu_hours: float = Field(..., description="GPU hours consumed") + cost: float = Field(..., description="Cost for this hour") + + +class ScalingSavings(BaseModel): + """Savings from auto-scaling vs fixed provisioning.""" + + fixed_daily_gpu_hours: float = Field(..., description="GPU hours with fixed instances") + dynamic_daily_gpu_hours: float = Field( + ..., description="GPU hours with auto-scaling" + ) + saved_gpu_hours: float = Field(..., description="GPU hours saved per day") + savings_percent: float = Field(..., description="Percentage savings") + fixed_daily_cost: float = Field(..., description="Daily cost with fixed instances") + dynamic_daily_cost: float = Field( + ..., description="Daily cost with auto-scaling" + ) + saved_cost: float = Field(..., description="Cost saved per day") + + +class GPUHourReport(BaseModel): + """Complete GPU hour calculation report.""" + + profile_name: str = Field(..., description="Traffic profile name") + gpu_cost_per_hour: float = Field(..., description="GPU hourly rate") + currency: str = Field(default="USD", description="Currency") + peak_qps: float = Field(..., description="Peak traffic QPS") + peak_instances: int = Field(..., description="Instances needed at peak") + off_peak_qps: float = Field(..., description="Minimum traffic QPS") + off_peak_instances: int = Field(..., description="Instances needed at off-peak") + daily_gpu_hours: float = Field(..., description="Total GPU hours per day") + monthly_gpu_hours: float = Field(..., description="Total GPU hours per month (30d)") + daily_cost: float = Field(..., description="Total cost per day") + monthly_cost: float = Field(..., description="Total cost per month (30d)") + avg_utilization: float = Field( + ..., description="Average utilization (dynamic/peak)" + ) + hourly_breakdown: List[HourBreakdown] = Field( + ..., description="Per-hour breakdown" + ) + scaling_savings: ScalingSavings = Field( + ..., description="Auto-scaling savings analysis" + ) + qps_per_instance: float = Field( + ..., description="Estimated QPS capacity per instance" + ) + + +class GPUHourCalculator: + """Calculate GPU hours and costs from benchmark data and traffic profiles.""" + + def __init__(self, data: BenchmarkData) -> None: + self._data = data + self._qps_per_instance = self._estimate_qps_per_instance() + + def _estimate_qps_per_instance(self) -> float: + """Estimate QPS capacity per instance from benchmark data.""" + total_instances = ( + self._data.metadata.num_prefill_instances + + self._data.metadata.num_decode_instances + ) + if total_instances <= 0: + return 1.0 + measured_qps = self._data.metadata.measured_qps + if measured_qps <= 0: + return 1.0 + return measured_qps / total_instances + + def _instances_for_qps(self, qps: float) -> int: + """Calculate minimum instances needed for a given QPS.""" + if qps <= 0: + return 0 + raw = qps / self._qps_per_instance + return max(1, math.ceil(raw)) + + def calculate( + self, + profile: TrafficProfile, + gpu_cost_per_hour: float = 2.0, + currency: str = "USD", + ) -> GPUHourReport: + """Calculate GPU hours and costs for a traffic profile.""" + # Build full 24-hour schedule (default to 0 QPS for unspecified hours) + hour_map: dict[int, float] = {ht.hour: ht.qps for ht in profile.hours} + + hourly_breakdown: list[HourBreakdown] = [] + total_gpu_hours = 0.0 + total_cost = 0.0 + peak_qps = 0.0 + peak_instances = 0 + min_qps = float("inf") + min_instances = float("inf") + instance_sum = 0 + + for h in range(24): + qps = hour_map.get(h, 0.0) + instances = self._instances_for_qps(qps) + gpu_h = float(instances) # 1 hour per instance + cost = gpu_h * gpu_cost_per_hour + + hourly_breakdown.append( + HourBreakdown( + hour=h, + qps=qps, + required_instances=instances, + gpu_hours=gpu_h, + cost=round(cost, 4), + ) + ) + + total_gpu_hours += gpu_h + total_cost += cost + instance_sum += instances + + if qps > peak_qps: + peak_qps = qps + peak_instances = instances + if qps < min_qps: + min_qps = qps + min_instances = instances + + if min_qps == float("inf"): + min_qps = 0.0 + if min_instances == float("inf"): + min_instances = 0 + + # Fixed provisioning = peak instances * 24h + fixed_gpu_hours = float(peak_instances) * 24.0 + fixed_cost = fixed_gpu_hours * gpu_cost_per_hour + saved_gpu_hours = fixed_gpu_hours - total_gpu_hours + savings_pct = (saved_gpu_hours / fixed_gpu_hours * 100.0) if fixed_gpu_hours > 0 else 0.0 + + avg_util = (instance_sum / (peak_instances * 24.0)) if peak_instances > 0 else 0.0 + + scaling_savings = ScalingSavings( + fixed_daily_gpu_hours=fixed_gpu_hours, + dynamic_daily_gpu_hours=total_gpu_hours, + saved_gpu_hours=round(saved_gpu_hours, 2), + savings_percent=round(savings_pct, 2), + fixed_daily_cost=round(fixed_cost, 2), + dynamic_daily_cost=round(total_cost, 2), + saved_cost=round(fixed_cost - total_cost, 2), + ) + + return GPUHourReport( + profile_name=profile.name, + gpu_cost_per_hour=gpu_cost_per_hour, + currency=currency, + peak_qps=peak_qps, + peak_instances=peak_instances, + off_peak_qps=min_qps, + off_peak_instances=int(min_instances), + daily_gpu_hours=round(total_gpu_hours, 2), + monthly_gpu_hours=round(total_gpu_hours * 30, 2), + daily_cost=round(total_cost, 2), + monthly_cost=round(total_cost * 30, 2), + avg_utilization=round(avg_util, 4), + hourly_breakdown=hourly_breakdown, + scaling_savings=scaling_savings, + qps_per_instance=round(self._qps_per_instance, 4), + ) + + +def calculate_gpu_hours( + data: BenchmarkData, + profile: TrafficProfile, + gpu_cost_per_hour: float = 2.0, + currency: str = "USD", +) -> dict: + """Convenience function for GPU hour calculation. + + Args: + data: Benchmark data with measured QPS and instance counts. + profile: 24-hour traffic profile. + gpu_cost_per_hour: Cost per GPU instance per hour. + currency: Currency label. + + Returns: + Dictionary with GPU hour report. + """ + calc = GPUHourCalculator(data) + report = calc.calculate(profile, gpu_cost_per_hour, currency) + return report.model_dump() diff --git a/tests/test_gpu_hours.py b/tests/test_gpu_hours.py new file mode 100644 index 0000000..d3120fa --- /dev/null +++ b/tests/test_gpu_hours.py @@ -0,0 +1,317 @@ +"""Tests for GPU Hour Calculator (M116).""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from xpyd_plan.benchmark_models import ( + BenchmarkData, + BenchmarkMetadata, + BenchmarkRequest, +) +from xpyd_plan.gpu_hours import ( + GPUHourCalculator, + HourlyTraffic, + TrafficProfile, + calculate_gpu_hours, +) + + +def _make_data( + measured_qps: float = 100.0, + num_prefill: int = 2, + num_decode: int = 2, +) -> BenchmarkData: + """Create minimal benchmark data.""" + requests = [ + BenchmarkRequest( + request_id=f"r{i}", + prompt_tokens=100, + output_tokens=50, + ttft_ms=20.0, + tpot_ms=10.0, + total_latency_ms=30.0, + timestamp=float(i), + ) + for i in range(100) + ] + return BenchmarkData( + metadata=BenchmarkMetadata( + num_prefill_instances=num_prefill, + num_decode_instances=num_decode, + total_instances=num_prefill + num_decode, + measured_qps=measured_qps, + ), + requests=requests, + ) + + +def _make_profile(hours: list[tuple[int, float]] | None = None) -> TrafficProfile: + """Create a traffic profile.""" + if hours is None: + hours = [(h, 50.0) for h in range(24)] + return TrafficProfile( + hours=[HourlyTraffic(hour=h, qps=q) for h, q in hours], + name="test-profile", + ) + + +# --- Model tests --- + + +class TestHourlyTraffic: + def test_valid(self): + ht = HourlyTraffic(hour=0, qps=10.0) + assert ht.hour == 0 + assert ht.qps == 10.0 + + def test_invalid_hour(self): + with pytest.raises(Exception): + HourlyTraffic(hour=25, qps=10.0) + + def test_negative_qps(self): + with pytest.raises(Exception): + HourlyTraffic(hour=0, qps=-1.0) + + +class TestTrafficProfile: + def test_valid(self): + profile = _make_profile([(0, 10.0), (12, 50.0)]) + assert len(profile.hours) == 2 + + def test_duplicate_hours_rejected(self): + with pytest.raises(Exception): + TrafficProfile( + hours=[ + HourlyTraffic(hour=0, qps=10.0), + HourlyTraffic(hour=0, qps=20.0), + ], + ) + + def test_sorted_by_hour(self): + profile = TrafficProfile( + hours=[ + HourlyTraffic(hour=12, qps=50.0), + HourlyTraffic(hour=0, qps=10.0), + ], + ) + assert profile.hours[0].hour == 0 + assert profile.hours[1].hour == 12 + + +# --- Calculator tests --- + + +class TestGPUHourCalculator: + def test_qps_per_instance(self): + data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2) + calc = GPUHourCalculator(data) + assert calc._qps_per_instance == 25.0 # 100 / 4 + + def test_instances_for_qps(self): + data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2) + calc = GPUHourCalculator(data) + # 25 QPS per instance, so 50 QPS needs 2 instances + assert calc._instances_for_qps(50.0) == 2 + # 51 needs 3 + assert calc._instances_for_qps(51.0) == 3 + # 0 needs 0 + assert calc._instances_for_qps(0.0) == 0 + + def test_uniform_traffic(self): + data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2) + calc = GPUHourCalculator(data) + profile = _make_profile() # 50 QPS every hour + report = calc.calculate(profile, gpu_cost_per_hour=2.0) + # 50 QPS / 25 per instance = 2 instances every hour + assert report.peak_instances == 2 + assert report.off_peak_instances == 2 + assert report.daily_gpu_hours == 48.0 # 2 * 24 + assert report.monthly_gpu_hours == 48.0 * 30 + + def test_variable_traffic(self): + data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2) + calc = GPUHourCalculator(data) + # Peak at hour 12 (100 QPS = 4 instances), off-peak at hour 0 (10 QPS = 1 instance) + hours = [(0, 10.0), (12, 100.0)] + profile = _make_profile(hours) + report = calc.calculate(profile, gpu_cost_per_hour=3.0) + assert report.peak_qps == 100.0 + assert report.peak_instances == 4 + assert report.off_peak_qps == 0.0 # unspecified hours = 0 + assert report.off_peak_instances == 0 + + def test_scaling_savings(self): + data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2) + calc = GPUHourCalculator(data) + # Only 1 hour of peak traffic + hours = [(12, 100.0)] + profile = _make_profile(hours) + report = calc.calculate(profile, gpu_cost_per_hour=2.0) + s = report.scaling_savings + # Fixed = 4 * 24 = 96 GPU hours + assert s.fixed_daily_gpu_hours == 96.0 + # Dynamic = 4 (hour 12) + 0 (other 23 hours) = 4 + assert s.dynamic_daily_gpu_hours == 4.0 + assert s.saved_gpu_hours == 92.0 + assert s.savings_percent > 95.0 + + def test_hourly_breakdown_has_24_entries(self): + data = _make_data() + calc = GPUHourCalculator(data) + profile = _make_profile([(6, 50.0)]) + report = calc.calculate(profile) + assert len(report.hourly_breakdown) == 24 + + def test_zero_qps_hours(self): + data = _make_data() + calc = GPUHourCalculator(data) + profile = _make_profile([(12, 50.0)]) + report = calc.calculate(profile) + # Hour 0 should have 0 instances + h0 = report.hourly_breakdown[0] + assert h0.required_instances == 0 + assert h0.gpu_hours == 0.0 + + def test_cost_calculation(self): + data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2) + calc = GPUHourCalculator(data) + profile = _make_profile() # 50 QPS * 24h + report = calc.calculate(profile, gpu_cost_per_hour=5.0, currency="EUR") + assert report.currency == "EUR" + assert report.daily_cost == 48.0 * 5.0 # 2 instances * 24h * $5 + assert report.monthly_cost == report.daily_cost * 30 + + def test_report_model_dump(self): + data = _make_data() + calc = GPUHourCalculator(data) + profile = _make_profile([(12, 50.0)]) + report = calc.calculate(profile) + d = report.model_dump() + assert "hourly_breakdown" in d + assert "scaling_savings" in d + assert "qps_per_instance" in d + + def test_low_measured_qps(self): + data = _make_data(measured_qps=0.5, num_prefill=1, num_decode=1) + calc = GPUHourCalculator(data) + assert calc._qps_per_instance == 0.25 + + def test_low_instances(self): + data = _make_data(measured_qps=100.0, num_prefill=1, num_decode=1) + calc = GPUHourCalculator(data) + assert calc._qps_per_instance == 50.0 + + +# --- Convenience function --- + + +class TestCalculateGPUHours: + def test_returns_dict(self): + data = _make_data() + profile = _make_profile([(12, 50.0)]) + result = calculate_gpu_hours(data, profile, gpu_cost_per_hour=2.0) + assert isinstance(result, dict) + assert "daily_gpu_hours" in result + assert "scaling_savings" in result + + def test_custom_currency(self): + data = _make_data() + profile = _make_profile([(12, 50.0)]) + result = calculate_gpu_hours(data, profile, currency="CNY") + assert result["currency"] == "CNY" + + +# --- Public imports --- + + +class TestPublicImports: + def test_imports_from_package(self): + import xpyd_plan + + assert hasattr(xpyd_plan, "GPUHourCalculator") + assert hasattr(xpyd_plan, "GPUHourReport") + assert hasattr(xpyd_plan, "HourBreakdown") + assert hasattr(xpyd_plan, "HourlyTraffic") + assert hasattr(xpyd_plan, "ScalingSavings") + assert hasattr(xpyd_plan, "TrafficProfile") + assert hasattr(xpyd_plan, "calculate_gpu_hours") + + +# --- CLI test --- + + +class TestCLI: + def test_gpu_hours_json_output(self, tmp_path: Path): + """Test CLI produces valid JSON output.""" + data = _make_data() + bench_path = tmp_path / "bench.json" + bench_path.write_text(json.dumps(data.model_dump())) + + profile_data = { + "name": "test", + "hours": [{"hour": 8, "qps": 50.0}, {"hour": 20, "qps": 10.0}], + } + import yaml + + profile_path = tmp_path / "traffic.yaml" + profile_path.write_text(yaml.dump(profile_data)) + + import argparse + + from xpyd_plan.cli._gpu_hours import _run + + args = argparse.Namespace( + benchmark=str(bench_path), + traffic_profile=str(profile_path), + gpu_cost=2.0, + currency="USD", + output_format="json", + ) + + import io + import sys + + captured = io.StringIO() + old_stdout = sys.stdout + sys.stdout = captured + try: + _run(args) + finally: + sys.stdout = old_stdout + + output = json.loads(captured.getvalue()) + assert "daily_gpu_hours" in output + assert "scaling_savings" in output + + +# --- Edge cases --- + + +class TestEdgeCases: + def test_all_24_hours_specified(self): + data = _make_data(measured_qps=100.0, num_prefill=2, num_decode=2) + calc = GPUHourCalculator(data) + hours = [(h, float(h * 5 + 10)) for h in range(24)] + profile = _make_profile(hours) + report = calc.calculate(profile) + assert len(report.hourly_breakdown) == 24 + assert report.peak_qps == 23 * 5 + 10 # hour 23 + + def test_single_hour_profile(self): + data = _make_data() + calc = GPUHourCalculator(data) + profile = _make_profile([(12, 100.0)]) + report = calc.calculate(profile) + assert report.off_peak_qps == 0.0 + assert report.peak_qps == 100.0 + + def test_avg_utilization_range(self): + data = _make_data() + calc = GPUHourCalculator(data) + profile = _make_profile() + report = calc.calculate(profile) + assert 0.0 <= report.avg_utilization <= 1.0