|
| 1 | +# Copyright 2026 Kevin (AluminatiAI) |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | +# |
| 15 | +# AluminatiAI — https://github.com/AgentMulder404/AluminatAI |
| 16 | +""" |
| 17 | +aluminatiai benchmark — GPU energy baseline measurement. |
| 18 | +
|
| 19 | +Samples power draw + utilization for --duration seconds, then prints a |
| 20 | +rich terminal report. With --upload, posts results to the AluminatiAI |
| 21 | +benchmarks API using the configured API key. |
| 22 | +
|
| 23 | +Usage: |
| 24 | + aluminatiai benchmark [--gpu N] [--duration SECONDS] [--upload] |
| 25 | + [--throughput TOKENS_PER_SEC] [--framework FRAMEWORK] |
| 26 | +""" |
| 27 | +from __future__ import annotations |
| 28 | + |
| 29 | +import argparse |
| 30 | +import json |
| 31 | +import sys |
| 32 | +import time |
| 33 | +import urllib.error |
| 34 | +import urllib.request |
| 35 | +from typing import Optional |
| 36 | + |
| 37 | +try: |
| 38 | + import pynvml # type: ignore[import-untyped] |
| 39 | +except ImportError: |
| 40 | + pynvml = None # type: ignore[assignment] |
| 41 | + |
| 42 | +try: |
| 43 | + from rich.console import Console |
| 44 | + from rich.table import Table |
| 45 | + from rich.text import Text |
| 46 | + _rich = True |
| 47 | +except ImportError: |
| 48 | + _rich = False |
| 49 | + |
| 50 | + |
| 51 | +def make_parser() -> argparse.ArgumentParser: |
| 52 | + p = argparse.ArgumentParser( |
| 53 | + prog="aluminatiai benchmark", |
| 54 | + description="Measure GPU energy baseline and (optionally) upload to AluminatiAI.", |
| 55 | + ) |
| 56 | + p.add_argument( |
| 57 | + "--gpu", type=int, default=0, metavar="N", |
| 58 | + help="GPU index to benchmark (default: 0)", |
| 59 | + ) |
| 60 | + p.add_argument( |
| 61 | + "--duration", type=int, default=60, metavar="SECONDS", |
| 62 | + help="Sampling duration in seconds (default: 60)", |
| 63 | + ) |
| 64 | + p.add_argument( |
| 65 | + "--upload", action="store_true", |
| 66 | + help="Upload results to AluminatiAI benchmarks API", |
| 67 | + ) |
| 68 | + p.add_argument( |
| 69 | + "--model-tag", default="", metavar="TAG", |
| 70 | + help="Model tag label (e.g. llama-3-8b) for the upload", |
| 71 | + ) |
| 72 | + p.add_argument( |
| 73 | + "--throughput", type=float, default=None, metavar="TOKENS_PER_SEC", |
| 74 | + help="Measured tokens/s (or samples/s) during the window. Enables kWh/1M tokens.", |
| 75 | + ) |
| 76 | + p.add_argument( |
| 77 | + "--framework", default="unknown", metavar="FRAMEWORK", |
| 78 | + help="Inference framework: pytorch, jax, vllm, ollama, triton, etc.", |
| 79 | + ) |
| 80 | + return p |
| 81 | + |
| 82 | + |
| 83 | +def _resolve_arch(gpu_name: str) -> Optional[object]: |
| 84 | + """Import resolve_arch from the efficiency module without crashing if absent.""" |
| 85 | + try: |
| 86 | + from efficiency.gpu_specs import resolve_arch # type: ignore[import] |
| 87 | + return resolve_arch(gpu_name) |
| 88 | + except Exception: |
| 89 | + return None |
| 90 | + |
| 91 | + |
| 92 | +def _print_plain(report: dict) -> None: |
| 93 | + print("\n=== AluminatiAI GPU Benchmark ===") |
| 94 | + print(f" GPU : {report['gpu_name']} (index {report['gpu_index']})") |
| 95 | + print(f" Duration : {report['duration_s']} s") |
| 96 | + print(f" Avg power : {report['avg_power_w']:.1f} W") |
| 97 | + print(f" Avg util : {report['avg_util_pct']:.1f} %") |
| 98 | + print(f" J / GPU-hr : {report['j_per_gpu_hour']:,.0f}") |
| 99 | + print(f" kWh / GPU-hr : {report['kwh_per_gpu_hour']:.4f}") |
| 100 | + if report.get("j_per_tflop") is not None: |
| 101 | + print(f" J / TFLOP : {report['j_per_tflop']:.4f}") |
| 102 | + if report.get("kwh_per_1m_tokens") is not None: |
| 103 | + print(f" kWh / 1M tok : {report['kwh_per_1m_tokens']:.6f}") |
| 104 | + if report.get("framework") and report["framework"] != "unknown": |
| 105 | + print(f" Framework : {report['framework']}") |
| 106 | + print() |
| 107 | + |
| 108 | + |
| 109 | +def _print_rich(report: dict) -> None: |
| 110 | + console = Console() |
| 111 | + console.print("\n[bold]AluminatiAI GPU Benchmark[/bold]", style="green") |
| 112 | + |
| 113 | + t = Table(show_header=False, box=None, padding=(0, 2)) |
| 114 | + t.add_column("Field", style="dim") |
| 115 | + t.add_column("Value") |
| 116 | + t.add_row("GPU", f"{report['gpu_name']} (index {report['gpu_index']})") |
| 117 | + t.add_row("Duration", f"{report['duration_s']} s") |
| 118 | + t.add_row("Avg power", f"{report['avg_power_w']:.1f} W") |
| 119 | + t.add_row("Avg util", f"{report['avg_util_pct']:.1f} %") |
| 120 | + t.add_row( |
| 121 | + "J / GPU-hr", |
| 122 | + Text(f"{report['j_per_gpu_hour']:,.0f}", style="bold green"), |
| 123 | + ) |
| 124 | + t.add_row("kWh / GPU-hr", f"{report['kwh_per_gpu_hour']:.4f}") |
| 125 | + if report.get("j_per_tflop") is not None: |
| 126 | + t.add_row("J / TFLOP", f"{report['j_per_tflop']:.4f}") |
| 127 | + if report.get("kwh_per_1m_tokens") is not None: |
| 128 | + t.add_row( |
| 129 | + "kWh / 1M tokens", |
| 130 | + Text(f"{report['kwh_per_1m_tokens']:.6f}", style="bold cyan"), |
| 131 | + ) |
| 132 | + if report.get("framework") and report["framework"] != "unknown": |
| 133 | + t.add_row("Framework", report["framework"]) |
| 134 | + console.print(t) |
| 135 | + console.print() |
| 136 | + |
| 137 | + |
| 138 | +def run_benchmark(args: argparse.Namespace) -> int: |
| 139 | + if pynvml is None: |
| 140 | + print( |
| 141 | + "Error: pynvml not installed. Install with: pip install nvidia-ml-py", |
| 142 | + file=sys.stderr, |
| 143 | + ) |
| 144 | + return 1 |
| 145 | + |
| 146 | + try: |
| 147 | + pynvml.nvmlInit() |
| 148 | + except pynvml.NVMLError as e: |
| 149 | + print(f"Error: NVML init failed — {e}", file=sys.stderr) |
| 150 | + return 1 |
| 151 | + |
| 152 | + try: |
| 153 | + device_count = pynvml.nvmlDeviceGetCount() |
| 154 | + if args.gpu >= device_count: |
| 155 | + print( |
| 156 | + f"Error: GPU index {args.gpu} not found (system has {device_count} GPU(s)).", |
| 157 | + file=sys.stderr, |
| 158 | + ) |
| 159 | + return 1 |
| 160 | + |
| 161 | + handle = pynvml.nvmlDeviceGetHandleByIndex(args.gpu) |
| 162 | + gpu_name: str = pynvml.nvmlDeviceGetName(handle) |
| 163 | + if isinstance(gpu_name, bytes): |
| 164 | + gpu_name = gpu_name.decode() |
| 165 | + |
| 166 | + print( |
| 167 | + f"Sampling GPU {args.gpu} ({gpu_name}) for {args.duration} s …", |
| 168 | + flush=True, |
| 169 | + ) |
| 170 | + |
| 171 | + power_samples: list[float] = [] |
| 172 | + util_samples: list[float] = [] |
| 173 | + duration = args.duration |
| 174 | + |
| 175 | + for _ in range(duration): |
| 176 | + try: |
| 177 | + # Power in milliwatts → convert to watts |
| 178 | + power_mw = pynvml.nvmlDeviceGetPowerUsage(handle) |
| 179 | + power_samples.append(power_mw / 1000.0) |
| 180 | + except pynvml.NVMLError: |
| 181 | + pass |
| 182 | + |
| 183 | + try: |
| 184 | + util = pynvml.nvmlDeviceGetUtilizationRates(handle) |
| 185 | + util_samples.append(float(util.gpu)) |
| 186 | + except pynvml.NVMLError: |
| 187 | + pass |
| 188 | + |
| 189 | + time.sleep(1.0) |
| 190 | + |
| 191 | + if not power_samples: |
| 192 | + print("Error: No power readings collected.", file=sys.stderr) |
| 193 | + return 1 |
| 194 | + |
| 195 | + avg_power_w = sum(power_samples) / len(power_samples) |
| 196 | + avg_util_pct = ( |
| 197 | + sum(util_samples) / len(util_samples) if util_samples else 0.0 |
| 198 | + ) |
| 199 | + |
| 200 | + # J/GPU-hr = avg_power_W × 3600 s/hr |
| 201 | + j_per_gpu_hour = avg_power_w * 3600.0 |
| 202 | + kwh_per_gpu_hour = j_per_gpu_hour / 3_600_000.0 |
| 203 | + |
| 204 | + # J/TFLOP estimate (only if GPU wasn't idle during the run) |
| 205 | + j_per_tflop: Optional[float] = None |
| 206 | + if avg_util_pct > 5: |
| 207 | + arch = _resolve_arch(gpu_name) |
| 208 | + if arch is not None: |
| 209 | + util_frac = avg_util_pct / 100.0 |
| 210 | + effective_tflops = arch.fp16_tflops * util_frac # type: ignore[attr-defined] |
| 211 | + if effective_tflops > 0: |
| 212 | + j_per_tflop = avg_power_w / effective_tflops |
| 213 | + |
| 214 | + # kWh per 1M tokens (inference efficiency): |
| 215 | + # kwh_per_1m_tokens = avg_power_w / (tokens_per_second × 1000) |
| 216 | + # e.g. 300W GPU, 1500 tok/s → 300 / 1_500_000 = 0.000200 kWh/1M tokens |
| 217 | + kwh_per_1m_tokens: Optional[float] = None |
| 218 | + if args.throughput and args.throughput > 0: |
| 219 | + kwh_per_1m_tokens = round(avg_power_w / (args.throughput * 1_000.0), 6) |
| 220 | + |
| 221 | + report = { |
| 222 | + "gpu_index": args.gpu, |
| 223 | + "gpu_name": gpu_name, |
| 224 | + "duration_s": duration, |
| 225 | + "avg_power_w": round(avg_power_w, 2), |
| 226 | + "avg_util_pct": round(avg_util_pct, 1), |
| 227 | + "j_per_gpu_hour": round(j_per_gpu_hour, 2), |
| 228 | + "kwh_per_gpu_hour": round(kwh_per_gpu_hour, 6), |
| 229 | + "j_per_tflop": round(j_per_tflop, 4) if j_per_tflop is not None else None, |
| 230 | + "throughput_tokens_s": args.throughput, |
| 231 | + "framework": args.framework, |
| 232 | + "kwh_per_1m_tokens": kwh_per_1m_tokens, |
| 233 | + } |
| 234 | + |
| 235 | + if _rich: |
| 236 | + _print_rich(report) |
| 237 | + else: |
| 238 | + _print_plain(report) |
| 239 | + |
| 240 | + if not args.upload: |
| 241 | + print( |
| 242 | + "To submit to the Green AI Index:\n" |
| 243 | + f" aluminatiai benchmark --throughput <TOKENS/S>" |
| 244 | + f" --model-tag <MODEL> --upload" |
| 245 | + ) |
| 246 | + |
| 247 | + if args.upload: |
| 248 | + _upload(report, args.model_tag) |
| 249 | + |
| 250 | + finally: |
| 251 | + try: |
| 252 | + pynvml.nvmlShutdown() |
| 253 | + except Exception: |
| 254 | + pass |
| 255 | + |
| 256 | + return 0 |
| 257 | + |
| 258 | + |
| 259 | +def _upload(report: dict, model_tag: str) -> None: |
| 260 | + """POST benchmark result to /api/benchmarks/hardware.""" |
| 261 | + import os |
| 262 | + |
| 263 | + api_key = os.getenv("ALUMINATAI_API_KEY", "") |
| 264 | + endpoint = os.getenv( |
| 265 | + "ALUMINATAI_API_ENDPOINT", |
| 266 | + "https://aluminatiai.com/v1/metrics/ingest", |
| 267 | + ) |
| 268 | + # Derive base URL from ingest endpoint |
| 269 | + base = endpoint.rstrip("/") |
| 270 | + if base.endswith("/v1/metrics/ingest"): |
| 271 | + base = base[: -len("/v1/metrics/ingest")] |
| 272 | + |
| 273 | + hardware_url = f"{base}/api/benchmarks/hardware" |
| 274 | + |
| 275 | + payload = { |
| 276 | + "gpu_arch": report["gpu_name"], |
| 277 | + "model_tag": model_tag or "unknown", |
| 278 | + "avg_power_w": report["avg_power_w"], |
| 279 | + "energy_j_per_gpu_hour": report["j_per_gpu_hour"], |
| 280 | + "duration_seconds": report["duration_s"], |
| 281 | + "gpu_count": 1, |
| 282 | + "precision_tag": "unknown", |
| 283 | + "tokens_per_second": report.get("throughput_tokens_s"), |
| 284 | + "framework_tag": report.get("framework", "unknown"), |
| 285 | + "kwh_per_1m_tokens": report.get("kwh_per_1m_tokens"), |
| 286 | + } |
| 287 | + |
| 288 | + data = json.dumps(payload).encode() |
| 289 | + req = urllib.request.Request( |
| 290 | + hardware_url, |
| 291 | + data=data, |
| 292 | + headers={ |
| 293 | + "Content-Type": "application/json", |
| 294 | + "X-API-Key": api_key, |
| 295 | + }, |
| 296 | + method="POST", |
| 297 | + ) |
| 298 | + try: |
| 299 | + with urllib.request.urlopen(req, timeout=15) as resp: |
| 300 | + body = json.loads(resp.read()) |
| 301 | + print(f"Uploaded: {body}") |
| 302 | + except urllib.error.HTTPError as e: |
| 303 | + print(f"Upload failed: HTTP {e.code}", file=sys.stderr) |
| 304 | + except Exception as e: |
| 305 | + print(f"Upload failed: {e}", file=sys.stderr) |
0 commit comments