diff --git a/.gitignore b/.gitignore
index 37d74f4f..6c6ea476 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ Thumbs.db
 # Distribution / packaging
 .Python
 build/
+!src/madengine/configs/build/
 develop-eggs/
 dist/
 downloads/
@@ -144,4 +145,4 @@ rocm_trace_lite_output/
 slurm_results/
 MagicMock/
 .madengine_session_start
-run_directory/
\ No newline at end of file
+run_directory/
diff --git a/README.md b/README.md
index ea3fbbb5..f66dda03 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@ madengine is a modern CLI tool for running Large Language Models (LLMs) and Deep
 - [Performance Profiling](#-performance-profiling)
 - [Reporting and Database](#-reporting-and-database)
 - [Installation](#-installation)
+- [YAML Configuration (`--config`)](#-yaml-configuration-config)
 - [Tips & Best Practices](#-tips--best-practices)
   - [Log error pattern scan](#log-error-pattern-scan)
   - [Exit codes and CI](#exit-codes-and-ci)
@@ -39,6 +40,7 @@ madengine is a modern CLI tool for running Large Language Models (LLMs) and Deep
 ## ✨ Key Features
 
 - **🚀 Modern CLI** - Rich terminal output with Typer and Rich
+- **📝 YAML Config** - Composable [Hydra-based YAML configs](#-yaml-configuration-config) with config groups, hardware profiles, and CLI overrides — alternative to `--additional-context` JSON
 - **🎯 Simple Deployment** - Run locally or deploy to Kubernetes/SLURM via configuration
 - **🔧 Distributed Launchers** - Full support for torchrun, DeepSpeed, Megatron-LM, TorchTitan, Primus, vLLM, SGLang
 - **🐳 Container-Native** - Docker-based execution with GPU support (ROCm, CUDA)
@@ -64,12 +66,16 @@ madengine discover --tags dummy
 # Run locally (full workflow: discover/build/run as configured by the model)
 madengine run --tags dummy
 
-# Or with explicit configuration
+# Or with explicit JSON configuration
 madengine run --tags dummy \
   --additional-context '{"gpu_vendor": "AMD", "guest_os": "UBUNTU"}'
+
+# Or with YAML config (Hydra-based, composable)
+madengine run --tags dummy --config scheduler=slurm --config launcher=torchrun
+madengine run --config my_job.yaml
 ```
 
-> **Note**: For build operations, `gpu_vendor` defaults to `AMD` and `guest_os` defaults to `UBUNTU` if not specified. For production deployments or non-AMD/Ubuntu environments, explicitly specify these values.
+> **Note**: `--config` is mutually exclusive with `--additional-context` / `--additional-context-file`. For build operations, `gpu_vendor` defaults to `AMD` and `guest_os` defaults to `UBUNTU` if not specified.
 
 If auto-detection does not find your **host** ROCm root, set top-level `MAD_ROCM_PATH` in `--additional-context`. For a different ROCm root **inside the container**, set `docker_env_vars.MAD_ROCM_PATH` in additional context. If you omit it, madengine derives in-container `ROCM_PATH` when running Docker (from the image's baked-in env, then an in-container probe, then `/opt/rocm` — it does **not** copy the host path). You can also set `ROCM_PATH` / `MAD_AUTO_ROCM_PATH=0` for **host** behavior as documented in [docs/configuration.md](docs/configuration.md):
 
@@ -127,7 +133,7 @@ For detailed command options, see the **[CLI Command Reference](docs/cli-referen
 | [Usage Guide](docs/usage.md) | Commands, workflows, and examples ([`--skip-model-run`](docs/usage.md#skip-model-run-after-build)) |
 | **[CLI Reference](docs/cli-reference.md)** | **Detailed command options and examples** |
 | [Deployment](docs/deployment.md) | Kubernetes and SLURM deployment |
-| [Configuration](docs/configuration.md) | Advanced options; [run log error pattern scan](docs/configuration.md#run-phase-log-error-pattern-scan) |
+| [Configuration](docs/configuration.md) | Advanced options; [YAML config (`--config`)](docs/configuration.md#yaml-configuration-config); [run log error pattern scan](docs/configuration.md#run-phase-log-error-pattern-scan) |
 | [Batch Build](docs/batch-build.md) | Selective builds for CI/CD |
 | [Launchers](docs/launchers.md) | Distributed training frameworks |
 | [Profiling](docs/profiling.md) | Performance analysis tools |
@@ -565,6 +571,115 @@ cd madengine && pip install -e ".[dev]"
 
 See [Installation Guide](docs/installation.md) for detailed instructions.
 
+## 📝 YAML Configuration (`--config`)
+
+The `--config` flag provides a composable, Hydra-based YAML alternative to `--additional-context` JSON strings. It is available on both `run` and `build` commands.
+
+> **Note**: `--config` is **mutually exclusive** with `--additional-context` and `--additional-context-file`. Using them together produces an error.
+
+### Basic Usage
+
+```bash
+# Use a config group override
+madengine run --tags dummy --config scheduler=slurm
+
+# Combine multiple overrides
+madengine run --tags dummy \
+  --config scheduler=slurm \
+  --config launcher=torchrun \
+  --config distributed.nnodes=4
+
+# Use a user YAML file
+madengine run --config my_job.yaml
+
+# User YAML file with overrides
+madengine run --config my_job.yaml --config distributed.nnodes=8
+
+# Append optional config groups with '+' prefix
+madengine run --tags dummy \
+  --config +profile=mi300x_8gpu \
+  --config +env=nccl_debug \
+  --config +tools=rocprofv3_lightweight
+```
+
+### Config Groups
+
+madengine ships with pre-built config groups that compose together:
+
+| Group | Default | Options | Description |
+|-------|---------|---------|-------------|
+| `platform` | `docker` | docker, bare_metal, singularity, podman | Execution platform |
+| `scheduler` | `local` | local, slurm, k8s | Job scheduler |
+| `hardware` | `amd` | amd, nvidia, cpu | GPU vendor and runtime settings |
+| `launcher` | `none` | none, torchrun, deepspeed, megatron, torchtitan, vllm, sglang, sglang_disagg, primus, native | Distributed launcher |
+| `+profile` | *(none)* | mi300x_8gpu, mi300x_single, mi250x_4gpu, h100_8gpu, a100_8gpu | Hardware profiles (append-only) |
+| `+env` | *(none)* | nccl_debug, nccl_tuned, infiniband, miopen_defaults | Environment presets (append-only) |
+| `+tools` | *(none)* | rocprofv3_lightweight, rocprofv3_comprehensive, power_profiler, vram_profiler, rocm_trace_lite | Profiling tools (append-only) |
+| `+data` | *(none)* | local, s3, minio, nas | Data source config (append-only) |
+| `+build` | *(none)* | default, ci, multi_arch | Build presets (append-only) |
+
+Groups with `+` prefix are append-only — they are not loaded by default and must be explicitly added.
+
+### User YAML Files
+
+Create a YAML file for your job and pass it via `--config`:
+
+```yaml
+# my_job.yaml
+model:
+  tags: [dummy]
+  timeout: 3600
+
+debug: true
+
+env_vars:
+  MY_VAR: test_value
+  NCCL_DEBUG: INFO
+
+distributed:
+  enabled: true
+  launcher: torchrun
+  nnodes: 2
+  nproc_per_node: 4
+
+slurm:
+  partition: gpu
+  time: "02:00:00"
+```
+
+```bash
+madengine run --config my_job.yaml
+```
+
+User YAML values are merged on top of the base config and config group selections, giving them highest priority.
+
+### Examples
+
+```bash
+# SLURM multi-node with torchrun
+madengine run --tags model \
+  --config scheduler=slurm \
+  --config launcher=torchrun \
+  --config distributed.nnodes=4
+
+# MI300x 8-GPU profile with NCCL debug
+madengine run --tags model \
+  --config +profile=mi300x_8gpu \
+  --config +env=nccl_debug
+
+# NVIDIA hardware with profiling
+madengine run --tags model \
+  --config hardware=nvidia \
+  --config +tools=rocprofv3_lightweight
+
+# Build with CI preset
+madengine build --tags model \
+  --config +build=ci \
+  --registry docker.io/myorg
+```
+
+See [Configuration Guide](docs/configuration.md#yaml-configuration-config) for full details, and [`examples/configs/`](examples/configs/) for annotated templates and ready-to-run demo files.
+
 ## 💡 Tips & Best Practices
 
 ### General Usage
diff --git a/docs/cli-reference.md b/docs/cli-reference.md
index 9340ae1c..e1ae31f6 100644
--- a/docs/cli-reference.md
+++ b/docs/cli-reference.md
@@ -100,6 +100,7 @@ madengine build [OPTIONS]
 | `--batch-manifest` | | TEXT | `None` | Input batch.json file for batch build mode |
 | `--additional-context` | `-c` | TEXT | `"{}"` | Additional context as JSON string |
 | `--additional-context-file` | `-f` | TEXT | `None` | File containing additional context JSON |
+| `--config` | | TEXT | `None` | YAML config file and/or Hydra overrides (repeatable). Mutually exclusive with `--additional-context` / `--additional-context-file`. See [Configuration — YAML config](configuration.md#yaml-configuration-config). |
 | `--clean-docker-cache` | | FLAG | `False` | Rebuild images without using cache |
 | `--manifest-output` | `-m` | TEXT | `build_manifest.json` | Output file for build manifest |
 | `--summary-output` | `-s` | TEXT | `None` | Output file for build summary JSON |
@@ -142,6 +143,12 @@ madengine build --tags model \
 
 # Real-time output with verbose logging
 madengine build --tags model --live-output --verbose
+
+# Build with YAML config (mutually exclusive with --additional-context)
+madengine build --tags model --config +build=ci --registry docker.io/myorg
+
+# Build with user YAML file
+madengine build --config my_build.yaml --registry docker.io/myorg
 ```
 
 **Default Values:**
@@ -215,6 +222,7 @@ madengine run [OPTIONS]
 | `--timeout` | | INT | `-1` | Timeout in seconds (-1=default 7200s, 0=no timeout) |
 | `--additional-context` | `-c` | TEXT | `"{}"` | Additional context as JSON string |
 | `--additional-context-file` | `-f` | TEXT | `None` | File containing additional context JSON |
+| `--config` | | TEXT | `None` | YAML config file and/or Hydra overrides (repeatable). Mutually exclusive with `--additional-context` / `--additional-context-file`. See [Configuration — YAML config](configuration.md#yaml-configuration-config). |
 | `--keep-alive` | | FLAG | `False` | Keep Docker containers alive after run |
 | `--keep-model-dir` | | FLAG | `False` | Keep model directory after run |
 | `--clean-docker-cache` | | FLAG | `False` | Rebuild images without using cache (full workflow) |
@@ -326,9 +334,23 @@ madengine run --tags model --output my_perf_results.csv
 # Clean up intermediate perf files after run
 madengine run --tags model --cleanup-perf
 
-# Using configuration file
+# Using JSON configuration file
 madengine run --tags model \
   --additional-context-file k8s-config.json
+
+# Using YAML config (mutually exclusive with --additional-context)
+madengine run --tags model \
+  --config scheduler=slurm \
+  --config launcher=torchrun \
+  --config distributed.nnodes=4
+
+# YAML config with hardware profile
+madengine run --tags model \
+  --config +profile=mi300x_8gpu \
+  --config +env=nccl_debug
+
+# User YAML file with overrides
+madengine run --config my_job.yaml --config distributed.nnodes=8
 ```
 
 **Execution Modes:**
@@ -601,6 +623,23 @@ For complex configurations, use JSON files with `--additional-context-file`:
 
 To run on specific nodes, add `"nodelist": "node01,node02"` to the `slurm` section. When set, the job runs only on those nodes and node health preflight is skipped. See [examples/slurm-configs/basic/03-multi-node-basic-nodelist.json](../examples/slurm-configs/basic/03-multi-node-basic-nodelist.json).
 
+### YAML Configuration (`--config`)
+
+As an alternative to JSON, use `--config` with composable Hydra-based YAML:
+
+```bash
+# Config group overrides
+madengine run --tags model --config scheduler=slurm --config launcher=torchrun
+
+# User YAML file
+madengine run --config my_job.yaml
+
+# Append-only groups (profiles, tools, env presets)
+madengine run --tags model --config +profile=mi300x_8gpu --config +env=nccl_debug
+```
+
+`--config` is **mutually exclusive** with `--additional-context` / `--additional-context-file`. See [Configuration Guide — YAML Configuration](configuration.md#yaml-configuration-config) for config groups, user YAML format, and full examples.
+
 ### Run phase: log error pattern scan (optional)
 
 These keys apply to **local Docker runs** when madengine post-processes the run log. Use them when substring matches cause false `FAILURE` status (for example benign `RuntimeError:` lines). Full details: [Configuration — Run phase: log error pattern scan](configuration.md#run-phase-log-error-pattern-scan).
diff --git a/docs/configuration.md b/docs/configuration.md
index 034ac6d8..4409ca97 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -11,7 +11,7 @@ madengine run --tags model \
   --additional-context '{"gpu_vendor": "AMD", "guest_os": "UBUNTU"}'
 ```
 
-### 2. Configuration File
+### 2. JSON Configuration File
 
 ```bash
 madengine run --tags model --additional-context-file config.json
@@ -26,6 +26,163 @@ madengine run --tags model --additional-context-file config.json
 }
 ```
 
+### 3. YAML Configuration (`--config`)
+
+```bash
+madengine run --tags model --config scheduler=slurm --config launcher=torchrun
+madengine run --config my_job.yaml
+```
+
+> **Mutual exclusion**: `--config` cannot be combined with `--additional-context` or `--additional-context-file`. Using both produces an error.
+
+See [YAML Configuration](#yaml-configuration-config) below for full details.
+
+## YAML Configuration (`--config`)
+
+The `--config` flag provides composable, Hydra-based YAML configuration as an alternative to JSON strings. It is available on both `run` and `build` commands.
+
+### How It Works
+
+1. madengine loads a base `config.yaml` with sensible defaults (AMD hardware, Docker platform, local scheduler)
+2. **Config group overrides** (e.g., `scheduler=slurm`) swap in pre-built YAML fragments
+3. **Inline overrides** (e.g., `distributed.nnodes=4`) set individual values
+4. **User YAML files** (e.g., `my_job.yaml`) merge on top with highest priority
+
+All four can be combined in a single command:
+
+```bash
+madengine run --config my_job.yaml \
+  --config scheduler=slurm \
+  --config launcher=torchrun \
+  --config distributed.nnodes=4
+```
+
+### Config Groups
+
+madengine ships with pre-built config groups under `src/madengine/configs/`:
+
+#### Default Groups (swapped via `group=option`)
+
+| Group | Default | Options | Description |
+|-------|---------|---------|-------------|
+| `platform` | `docker` | `docker`, `bare_metal`, `singularity`, `podman` | Execution platform |
+| `scheduler` | `local` | `local`, `slurm`, `k8s` | Job scheduler — `slurm` and `k8s` add their respective config sections |
+| `hardware` | `amd` | `amd`, `nvidia`, `cpu` | Sets `gpu_vendor`, `guest_os`, runtime device config |
+| `launcher` | `none` | `none`, `torchrun`, `deepspeed`, `megatron`, `torchtitan`, `vllm`, `sglang`, `sglang_disagg`, `primus`, `native` | Distributed launcher — sets `distributed.enabled`, `distributed.launcher`, and launcher-specific defaults |
+
+#### Append-Only Groups (added via `+group=option`)
+
+These are not loaded by default. Use the `+` prefix to add them:
+
+| Group | Options | Description |
+|-------|---------|-------------|
+| `+profile` | `mi300x_8gpu`, `mi300x_single`, `mi250x_4gpu`, `h100_8gpu`, `a100_8gpu` | Hardware profiles — sets GPU type, environment variables, distributed settings |
+| `+env` | `nccl_debug`, `nccl_tuned`, `infiniband`, `miopen_defaults` | Environment variable presets |
+| `+tools` | `rocprofv3_lightweight`, `rocprofv3_comprehensive`, `power_profiler`, `vram_profiler`, `rocm_trace_lite` | Profiling tool presets |
+| `+data` | `local`, `s3`, `minio`, `nas` | Data source configuration |
+| `+build` | `default`, `ci`, `multi_arch` | Build presets for CI or multi-arch builds |
+
+### User YAML Files
+
+Create a job-specific YAML file and pass it via `--config`:
+
+```yaml
+# my_slurm_job.yaml
+model:
+  tags: [my_model]
+  timeout: 3600
+
+debug: true
+
+env_vars:
+  MY_VAR: test_value
+  NCCL_DEBUG: INFO
+
+distributed:
+  enabled: true
+  launcher: torchrun
+  nnodes: 2
+  nproc_per_node: 4
+
+slurm:
+  partition: gpu
+  time: "02:00:00"
+```
+
+```bash
+madengine run --config my_slurm_job.yaml
+```
+
+User YAML values merge on top of the base config and any config group selections. You can also combine a user file with overrides:
+
+```bash
+madengine run --config my_slurm_job.yaml --config distributed.nnodes=8
+```
+
+### Priority Order
+
+1. **Inline overrides** (`key=value`) — highest
+2. **User YAML file** — merged on top of composed config
+3. **Config group selections** (`scheduler=slurm`)
+4. **Base config defaults** — lowest
+
+### Examples
+
+```bash
+# Local run with defaults (AMD, Docker, no distribution)
+madengine run --tags dummy --config
+
+# SLURM multi-node training
+madengine run --tags model \
+  --config scheduler=slurm \
+  --config launcher=torchrun \
+  --config distributed.nnodes=4
+
+# MI300x 8-GPU profile with NCCL debug
+madengine run --tags model \
+  --config +profile=mi300x_8gpu \
+  --config +env=nccl_debug
+
+# NVIDIA hardware
+madengine run --tags model --config hardware=nvidia
+
+# Kubernetes with vLLM inference
+madengine run --tags model \
+  --config scheduler=k8s \
+  --config launcher=vllm \
+  --config distributed.nnodes=2
+
+# Build with CI preset and multi-arch
+madengine build --tags model \
+  --config +build=ci \
+  --registry docker.io/myorg
+
+# User YAML with profiling
+madengine run --config my_job.yaml \
+  --config +tools=rocprofv3_lightweight
+```
+
+### Metadata from Config
+
+When using `--config`, certain YAML keys are extracted as metadata rather than passed to the internal context:
+
+- `model.tags` — used as `--tags` if not specified on the CLI
+- `model.timeout` — used as `--timeout` if not specified
+- `model.container_image` — promoted to `MAD_CONTAINER_IMAGE` in context
+- `build.registry` — used as `--registry` if not specified
+- `build.target_archs` — used as `--target-archs` if not specified
+- `platform`, `output`, `summary_output`, `data_config`, `live_output` — extracted to metadata
+
+### Validation
+
+madengine validates the composed config and reports errors for:
+
+- Conflicting scheduler selections (e.g., both `slurm` and `k8s` sections present)
+- `distributed.enabled: true` without a `distributed.launcher`
+- Invalid `distributed.nnodes` (must be a positive integer)
+- Unsupported `platform.type` (currently only `docker` is supported)
+- Unknown top-level config keys (catches typos)
+
 ## Default Configuration Values
 
 madengine provides sensible defaults for common AMD/Ubuntu workflows:
@@ -389,6 +546,8 @@ Automatically applies (see presets under `src/madengine/deployment/presets/k8s/`
 }
 ```
 
+See [`examples/configs/templates/k8s.yaml`](../examples/configs/templates/k8s.yaml) for the complete annotated YAML template, or [`examples/configs/demo/k8s/`](../examples/configs/demo/k8s/) for ready-to-run examples.
+
 ## SLURM Deployment
 
 ### Basic Configuration
@@ -414,10 +573,7 @@ Automatically applies (see presets under `src/madengine/deployment/presets/k8s/`
     "gpus_per_node": 8,
     "nodes": 2,
     "nodelist": "node01,node02",
-    "time": "24:00:00",
-    "mem": "64G",
-    "mail_user": "user@example.com",
-    "mail_type": "ALL"
+    "time": "24:00:00"
   }
 }
 ```
@@ -428,13 +584,16 @@ Automatically applies (see presets under `src/madengine/deployment/presets/k8s/`
 - `partition` - SLURM partition name (required)
 - `account` - Billing account
 - `qos` - Quality of Service
-- `gpus_per_node` - GPUs per node (default: 1)
+- `gpus_per_node` - GPUs per node (default: 8)
 - `nodes` - Number of nodes (default: 1)
 - `nodelist` - Comma-separated node names to run on (e.g. `"node01,node02"`); when set, job is restricted to these nodes and automatic node health preflight is skipped
-- `time` - Wall time limit HH:MM:SS (required)
-- `mem` - Memory per node (e.g., "64G")
-- `mail_user` - Email for notifications
-- `mail_type` - Notification types (BEGIN, END, FAIL, ALL)
+- `exclude` - Comma-separated node names to exclude
+- `constraint` - Node feature constraint (e.g., `"infiniband"`)
+- `time` - Wall time limit HH:MM:SS (default: `"24:00:00"`)
+- `exclusive` - Request exclusive node access (default: `true`)
+- `modules` - List of environment modules to load
+- `network_interface` - Network interface for NCCL/GLOO (e.g., `"ib0"`)
+- `shared_workspace` - Explicit NFS/Lustre shared workspace path
 
 ### Multi-Node SLURM
 
@@ -454,6 +613,8 @@ Automatically applies (see presets under `src/madengine/deployment/presets/k8s/`
 }
 ```
 
+See [`examples/configs/templates/slurm.yaml`](../examples/configs/templates/slurm.yaml) for the complete annotated YAML template, or [`examples/configs/demo/slurm/`](../examples/configs/demo/slurm/) for ready-to-run examples.
+
 ## Distributed Training
 
 ### Launcher Configuration
@@ -469,6 +630,8 @@ Automatically applies (see presets under `src/madengine/deployment/presets/k8s/`
 }
 ```
 
+> **YAML config note**: When using `--config`, you must also set `distributed.enabled: true` explicitly. The default config loads `launcher: none` which sets `enabled: false`; setting a launcher alone does not override it.
+
 **Launcher Options:**
 - `launcher` - Framework name (required)
 - `nnodes` - Number of nodes
diff --git a/docs/superpowers/plans/2026-05-02-config-driven-yaml.md b/docs/superpowers/plans/2026-05-02-config-driven-yaml.md
new file mode 100644
index 00000000..01455ba4
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-02-config-driven-yaml.md
@@ -0,0 +1,1989 @@
+# Config-Driven YAML System Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add a `--config` CLI argument to madengine that loads Hydra-based YAML configs with composable config groups and CLI override support, backward-compatible with `--additional-context`.
+
+**Architecture:** A new `src/madengine/config/` package uses Hydra's Compose API (not `@hydra.main`) to load YAML config groups from `src/madengine/configs/`, then a `ConfigTranslator` maps clean YAML keys to the internal `additional_context` dict format that existing orchestrators expect. The `--config` arg is added to the `run` and `build` Typer commands; `--additional-context` still works and takes highest priority.
+
+**Tech Stack:** hydra-core>=1.3, omegaconf>=2.3 (new deps); Typer (existing CLI); pytest (tests)
+
+---
+
+## File Map
+
+### New Files — Config Package
+
+| File | Responsibility |
+|------|---------------|
+| `src/madengine/config/__init__.py` | Public API: `load_config(config_args) -> (dict, dict)` |
+| `src/madengine/config/loader.py` | `HydraConfigLoader` — Compose API wrapper, separates file path from overrides |
+| `src/madengine/config/translator.py` | `ConfigTranslator` — maps YAML keys to `additional_context` format |
+| `src/madengine/config/schema.py` | `ConfigValidator` — cross-field checks, unknown key detection |
+
+### New Files — YAML Configs
+
+| Directory | Files |
+|-----------|-------|
+| `src/madengine/configs/` | `config.yaml` (root) |
+| `src/madengine/configs/platform/` | `docker.yaml`, `bare_metal.yaml`, `singularity.yaml`, `podman.yaml` |
+| `src/madengine/configs/scheduler/` | `local.yaml`, `slurm.yaml`, `k8s.yaml` |
+| `src/madengine/configs/hardware/` | `amd.yaml`, `nvidia.yaml`, `cpu.yaml` |
+| `src/madengine/configs/launcher/` | `none.yaml`, `torchrun.yaml`, `deepspeed.yaml`, `megatron.yaml`, `vllm.yaml`, `sglang.yaml`, `sglang_disagg.yaml`, `torchtitan.yaml`, `primus.yaml`, `native.yaml` |
+| `src/madengine/configs/profile/` | `mi300x_8gpu.yaml`, `mi300x_single.yaml`, `mi250x_4gpu.yaml`, `h100_8gpu.yaml`, `a100_8gpu.yaml` |
+| `src/madengine/configs/env/` | `nccl_debug.yaml`, `nccl_tuned.yaml`, `infiniband.yaml`, `miopen_defaults.yaml` |
+| `src/madengine/configs/tools/` | `rocprofv3_lightweight.yaml`, `rocprofv3_comprehensive.yaml`, `power_profiler.yaml`, `vram_profiler.yaml`, `rocm_trace_lite.yaml` |
+| `src/madengine/configs/data/` | `local.yaml`, `s3.yaml`, `minio.yaml`, `nas.yaml` |
+| `src/madengine/configs/build/` | `default.yaml`, `ci.yaml`, `multi_arch.yaml` |
+
+### New Files — Tests
+
+| File | Responsibility |
+|------|---------------|
+| `tests/unit/test_hydra_config_loader.py` | HydraConfigLoader unit tests |
+| `tests/unit/test_config_translator.py` | ConfigTranslator unit tests |
+| `tests/unit/test_config_schema.py` | ConfigValidator unit tests |
+| `tests/unit/test_config_integration.py` | End-to-end: `load_config()` → dict |
+| `tests/fixtures/configs/` | Test YAML fixtures |
+
+### Modified Files
+
+| File | Change |
+|------|--------|
+| `pyproject.toml` | Add `hydra-core>=1.3`, `omegaconf>=2.3` to dependencies; add `configs` to wheel force-include |
+| `src/madengine/cli/commands/run.py` | Add `--config` parameter, config loading + merge logic |
+| `src/madengine/cli/commands/build.py` | Add `--config` parameter, config loading + merge logic |
+
+---
+
+### Task 1: Add Dependencies and Wheel Config
+
+**Files:**
+- Modify: `pyproject.toml`
+
+- [ ] **Step 1: Add hydra-core and omegaconf to dependencies**
+
+In `pyproject.toml`, add to the `dependencies` list after `"pyyaml>=6.0"`:
+
+```toml
+dependencies = [
+  "pandas",
+  "GitPython",
+  "jsondiff",
+  "sqlalchemy",
+  "paramiko",
+  "tqdm",
+  "typing-extensions",
+  "pymongo",
+  "toml",
+  "typer>=0.9.0",
+  "rich>=13.0.0",
+  "click>=8.0.0",
+  "jinja2>=3.0.0",
+  "pyyaml>=6.0",
+  "hydra-core>=1.3",
+  "omegaconf>=2.3",
+]
+```
+
+- [ ] **Step 2: Add configs directory to wheel force-include**
+
+In the `[tool.hatch.build.targets.wheel.force-include]` section, add:
+
+```toml
+[tool.hatch.build.targets.wheel.force-include]
+"src/madengine/scripts" = "madengine/scripts"
+"src/madengine/deployment/templates" = "madengine/deployment/templates"
+"src/madengine/configs" = "madengine/configs"
+```
+
+- [ ] **Step 3: Install updated dependencies**
+
+Run: `pip install -e ".[dev]"`
+Expected: Clean install with hydra-core and omegaconf resolved.
+
+- [ ] **Step 4: Verify imports work**
+
+Run: `python -c "from hydra import compose, initialize_config_dir; from omegaconf import OmegaConf, DictConfig; print('OK')"`
+Expected: Prints `OK`.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add pyproject.toml
+git commit -m "feat(config): add hydra-core and omegaconf dependencies"
+```
+
+---
+
+### Task 2: Create YAML Config Files — Root and Default Groups
+
+**Files:**
+- Create: `src/madengine/configs/config.yaml`
+- Create: `src/madengine/configs/platform/docker.yaml`
+- Create: `src/madengine/configs/platform/bare_metal.yaml`
+- Create: `src/madengine/configs/platform/singularity.yaml`
+- Create: `src/madengine/configs/platform/podman.yaml`
+- Create: `src/madengine/configs/scheduler/local.yaml`
+- Create: `src/madengine/configs/scheduler/slurm.yaml`
+- Create: `src/madengine/configs/scheduler/k8s.yaml`
+- Create: `src/madengine/configs/hardware/amd.yaml`
+- Create: `src/madengine/configs/hardware/nvidia.yaml`
+- Create: `src/madengine/configs/hardware/cpu.yaml`
+- Create: `src/madengine/configs/launcher/none.yaml`
+- Create: `src/madengine/configs/launcher/torchrun.yaml`
+- Create: `src/madengine/configs/launcher/deepspeed.yaml`
+- Create: `src/madengine/configs/launcher/megatron.yaml`
+- Create: `src/madengine/configs/launcher/vllm.yaml`
+- Create: `src/madengine/configs/launcher/sglang.yaml`
+- Create: `src/madengine/configs/launcher/sglang_disagg.yaml`
+- Create: `src/madengine/configs/launcher/torchtitan.yaml`
+- Create: `src/madengine/configs/launcher/primus.yaml`
+- Create: `src/madengine/configs/launcher/native.yaml`
+
+- [ ] **Step 1: Create directory structure**
+
+```bash
+mkdir -p src/madengine/configs/{platform,scheduler,hardware,launcher,profile,env,tools,data,build}
+```
+
+- [ ] **Step 2: Create root config.yaml**
+
+Write to `src/madengine/configs/config.yaml`:
+
+```yaml
+defaults:
+  - platform: docker
+  - scheduler: local
+  - hardware: amd
+  - launcher: none
+  - _self_
+
+model:
+  tags: []
+  manifest_file: null
+  container_image: null
+  skip_run: false
+  timeout: null
+
+docker:
+  build_args: {}
+  env_vars: {}
+  mounts: {}
+  gpus: null
+  cpus: null
+  additional_run_options: null
+  keep_alive: false
+  clean_cache: false
+
+build:
+  registry: null
+  target_archs: []
+  manifest_output: build_manifest.json
+
+env_vars: {}
+
+debug: false
+live_output: false
+
+log_error:
+  pattern_scan: true
+  benign_patterns: []
+  patterns: []
+
+tools: []
+pre_scripts: []
+post_scripts: []
+encapsulate_script: null
+
+data_config: data.json
+
+output: perf.csv
+summary_output: null
+```
+
+- [ ] **Step 3: Create platform configs**
+
+Write to `src/madengine/configs/platform/docker.yaml`:
+
+```yaml
+# @package _global_
+platform:
+  type: docker
+```
+
+Write to `src/madengine/configs/platform/bare_metal.yaml`:
+
+```yaml
+# @package _global_
+platform:
+  type: bare_metal
+```
+
+Write to `src/madengine/configs/platform/singularity.yaml`:
+
+```yaml
+# @package _global_
+platform:
+  type: singularity
+```
+
+Write to `src/madengine/configs/platform/podman.yaml`:
+
+```yaml
+# @package _global_
+platform:
+  type: podman
+```
+
+- [ ] **Step 4: Create scheduler configs**
+
+Write to `src/madengine/configs/scheduler/local.yaml`:
+
+```yaml
+# @package _global_
+```
+
+Write to `src/madengine/configs/scheduler/slurm.yaml`:
+
+```yaml
+# @package _global_
+slurm:
+  partition: amd-rccl
+  nodes: 1
+  gpus_per_node: 8
+  time: "24:00:00"
+  output_dir: ./slurm_results
+  exclusive: true
+  modules: []
+  account: null
+  qos: null
+  constraint: null
+  nodelist: null
+  exclude: null
+  results_dir: null
+  shared_workspace: null
+  network_interface: null
+
+env_vars:
+  OMP_NUM_THREADS: "8"
+  MIOPEN_FIND_MODE: "1"
+```
+
+Write to `src/madengine/configs/scheduler/k8s.yaml`:
+
+```yaml
+# @package _global_
+k8s:
+  kubeconfig: ~/.kube/config
+  namespace: default
+  image_pull_policy: Always
+  backoff_limit: 3
+  ttl_seconds_after_finished: null
+  allow_privileged_profiling: null
+  gpu_count: null
+  gpu_resource_name: amd.com/gpu
+  memory: null
+  memory_limit: null
+  cpu: null
+  cpu_limit: null
+  host_ipc: true
+  node_selector: {}
+  tolerations: []
+  nfs_storage_class: nfs-banff
+  local_path_storage_class: local-path
+  data_storage_class: nfs-banff
+  recreate_shared_data_pvc: false
+  results_pvc: null
+  data_pvc: null
+  output_dir: null
+  secrets:
+    strategy: from_local_credentials
+    image_pull_secret_names: []
+    runtime_secret_name: null
+
+env_vars:
+  OMP_NUM_THREADS: "8"
+```
+
+- [ ] **Step 5: Create hardware configs**
+
+Write to `src/madengine/configs/hardware/amd.yaml`:
+
+```yaml
+# @package _global_
+gpu_vendor: AMD
+guest_os: UBUNTU
+
+runtime:
+  devices:
+    - /dev/kfd
+    - /dev/dri
+    - /dev/infiniband
+  capabilities:
+    - SYS_PTRACE
+  security_opts:
+    - seccomp=unconfined
+  network_mode: host
+  ipc: host
+  groups:
+    - video
+  use_gpu_flag: false
+```
+
+Write to `src/madengine/configs/hardware/nvidia.yaml`:
+
+```yaml
+# @package _global_
+gpu_vendor: NVIDIA
+guest_os: UBUNTU
+
+runtime:
+  devices: []
+  capabilities: []
+  security_opts: []
+  network_mode: host
+  ipc: host
+  groups: []
+  use_gpu_flag: true
+```
+
+Write to `src/madengine/configs/hardware/cpu.yaml`:
+
+```yaml
+# @package _global_
+gpu_vendor: null
+guest_os: UBUNTU
+
+runtime:
+  devices: []
+  capabilities: []
+  security_opts: []
+  network_mode: null
+  ipc: null
+  groups: []
+  use_gpu_flag: false
+```
+
+- [ ] **Step 6: Create launcher configs**
+
+Write to `src/madengine/configs/launcher/none.yaml`:
+
+```yaml
+# @package _global_
+distributed:
+  enabled: false
+```
+
+Write to `src/madengine/configs/launcher/torchrun.yaml`:
+
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: torchrun
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
+  port: 29500
+```
+
+Write to `src/madengine/configs/launcher/deepspeed.yaml`:
+
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: deepspeed
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
+```
+
+Write to `src/madengine/configs/launcher/megatron.yaml`:
+
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: torchrun
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
+```
+
+Write to `src/madengine/configs/launcher/vllm.yaml`:
+
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: vllm
+  nnodes: 1
+  nproc_per_node: 4
+
+vllm:
+  kv_cache_size: 0.7
+  max_model_len: null
+  tensor_parallel_size: null
+```
+
+Write to `src/madengine/configs/launcher/sglang.yaml`:
+
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: sglang
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  port: 29500
+```
+
+Write to `src/madengine/configs/launcher/sglang_disagg.yaml`:
+
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: sglang-disagg
+  backend: nccl
+  nnodes: 3
+  nproc_per_node: 8
+  port: 29500
+
+sglang_disagg:
+  prefill_nodes: null
+  decode_nodes: null
+  transfer_backend: mooncake
+```
+
+Write to `src/madengine/configs/launcher/torchtitan.yaml`:
+
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: torchrun
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
+```
+
+Write to `src/madengine/configs/launcher/primus.yaml`:
+
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: primus
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
+```
+
+Write to `src/madengine/configs/launcher/native.yaml`:
+
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: native
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
+```
+
+- [ ] **Step 7: Verify Hydra can compose the root config**
+
+Run: `python -c "
+from hydra import compose, initialize_config_dir
+from hydra.core.global_hydra import GlobalHydra
+from omegaconf import OmegaConf
+import os
+GlobalHydra.instance().clear()
+config_dir = os.path.abspath('src/madengine/configs')
+with initialize_config_dir(config_dir=config_dir, version_base=None):
+    cfg = compose(config_name='config')
+print(OmegaConf.to_yaml(cfg))
+"`
+
+Expected: Prints the full composed YAML with all default groups merged — `gpu_vendor: AMD`, `distributed.enabled: false`, etc.
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add src/madengine/configs/
+git commit -m "feat(config): add root config.yaml and default config groups"
+```
+
+---
+
+### Task 3: Create YAML Config Files — Append-Only Groups
+
+**Files:**
+- Create: `src/madengine/configs/profile/mi300x_8gpu.yaml` (and 4 others)
+- Create: `src/madengine/configs/env/nccl_debug.yaml` (and 3 others)
+- Create: `src/madengine/configs/tools/rocprofv3_lightweight.yaml` (and 4 others)
+- Create: `src/madengine/configs/data/local.yaml` (and 3 others)
+- Create: `src/madengine/configs/build/default.yaml` (and 2 others)
+
+- [ ] **Step 1: Create profile configs**
+
+Write to `src/madengine/configs/profile/mi300x_8gpu.yaml`:
+
+```yaml
+# @package _global_
+gpu_type: mi300x
+gpu_memory_gb: 192
+gpus_per_node: 8
+
+distributed:
+  nproc_per_node: 8
+
+env_vars:
+  GPU_MAX_HW_QUEUES: "2"
+  HSA_ENABLE_SDMA: "0"
+  HSA_FORCE_FINE_GRAIN_PCIE: "1"
+```
+
+Write to `src/madengine/configs/profile/mi300x_single.yaml`:
+
+```yaml
+# @package _global_
+gpu_type: mi300x
+gpu_memory_gb: 192
+gpus_per_node: 1
+
+distributed:
+  nproc_per_node: 1
+```
+
+Write to `src/madengine/configs/profile/mi250x_4gpu.yaml`:
+
+```yaml
+# @package _global_
+gpu_type: mi250x
+gpu_memory_gb: 128
+gpus_per_node: 4
+
+distributed:
+  nproc_per_node: 4
+
+env_vars:
+  GPU_MAX_HW_QUEUES: "2"
+  HSA_ENABLE_SDMA: "0"
+```
+
+Write to `src/madengine/configs/profile/h100_8gpu.yaml`:
+
+```yaml
+# @package _global_
+gpu_vendor: NVIDIA
+guest_os: UBUNTU
+gpu_type: h100
+gpu_memory_gb: 80
+gpus_per_node: 8
+
+runtime:
+  devices: []
+  capabilities: []
+  security_opts: []
+  network_mode: host
+  ipc: host
+  groups: []
+  use_gpu_flag: true
+
+distributed:
+  nproc_per_node: 8
+```
+
+Write to `src/madengine/configs/profile/a100_8gpu.yaml`:
+
+```yaml
+# @package _global_
+gpu_vendor: NVIDIA
+guest_os: UBUNTU
+gpu_type: a100
+gpu_memory_gb: 80
+gpus_per_node: 8
+
+runtime:
+  devices: []
+  capabilities: []
+  security_opts: []
+  network_mode: host
+  ipc: host
+  groups: []
+  use_gpu_flag: true
+
+distributed:
+  nproc_per_node: 8
+```
+
+- [ ] **Step 2: Create env configs**
+
+Write to `src/madengine/configs/env/nccl_debug.yaml`:
+
+```yaml
+# @package _global_
+env_vars:
+  NCCL_DEBUG: INFO
+  NCCL_DEBUG_SUBSYS: "INIT,NET,GRAPH"
+  TORCH_DISTRIBUTED_DEBUG: DETAIL
+```
+
+Write to `src/madengine/configs/env/nccl_tuned.yaml`:
+
+```yaml
+# @package _global_
+env_vars:
+  NCCL_DEBUG: WARN
+  TORCH_NCCL_HIGH_PRIORITY: "1"
+  GPU_MAX_HW_QUEUES: "2"
+  NCCL_TIMEOUT: "600"
+  TORCH_NCCL_ASYNC_ERROR_HANDLING: "1"
+```
+
+Write to `src/madengine/configs/env/infiniband.yaml`:
+
+```yaml
+# @package _global_
+env_vars:
+  NCCL_IB_DISABLE: "0"
+  NCCL_IB_HCA: "mlx5_0:1,mlx5_1:1"
+  NCCL_SOCKET_IFNAME: ib0
+  NCCL_NET_GDR_LEVEL: "3"
+```
+
+Write to `src/madengine/configs/env/miopen_defaults.yaml`:
+
+```yaml
+# @package _global_
+env_vars:
+  MIOPEN_FIND_MODE: "1"
+  MIOPEN_USER_DB_PATH: /tmp/.miopen
+```
+
+- [ ] **Step 3: Create tools configs**
+
+Write to `src/madengine/configs/tools/rocprofv3_lightweight.yaml`:
+
+```yaml
+# @package _global_
+tools:
+  - name: rocprofv3_lightweight
+```
+
+Write to `src/madengine/configs/tools/rocprofv3_comprehensive.yaml`:
+
+```yaml
+# @package _global_
+tools:
+  - name: rocprofv3_full
+    env_vars:
+      RCCL_DEBUG: INFO
+      HSA_ENABLE_SDMA: "0"
+  - name: gpu_info_power_profiler
+    env_vars:
+      POWER_DEVICE: all
+      POWER_SAMPLING_RATE: "0.1"
+      POWER_DUAL_GCD: "false"
+  - name: gpu_info_vram_profiler
+    env_vars:
+      VRAM_DEVICE: all
+      VRAM_SAMPLING_RATE: "0.1"
+  - name: miopen_trace
+  - name: rocblas_trace
+```
+
+Write to `src/madengine/configs/tools/power_profiler.yaml`:
+
+```yaml
+# @package _global_
+tools:
+  - name: gpu_info_power_profiler
+    env_vars:
+      POWER_DEVICE: all
+      POWER_SAMPLING_RATE: "0.1"
+      POWER_MODE: power
+      POWER_DUAL_GCD: "false"
+      POWER_OUTPUT_FILE: gpu_info_power_profiler_output.csv
+```
+
+Write to `src/madengine/configs/tools/vram_profiler.yaml`:
+
+```yaml
+# @package _global_
+tools:
+  - name: gpu_info_vram_profiler
+    env_vars:
+      VRAM_DEVICE: all
+      VRAM_SAMPLING_RATE: "0.1"
+      VRAM_MODE: vram
+      VRAM_DUAL_GCD: "false"
+      VRAM_OUTPUT_FILE: gpu_info_vram_profiler_output.csv
+```
+
+Write to `src/madengine/configs/tools/rocm_trace_lite.yaml`:
+
+```yaml
+# @package _global_
+tools:
+  - name: rocm_trace_lite
+    env_vars:
+      RTL_MODE: lite
+```
+
+- [ ] **Step 4: Create data configs**
+
+Write to `src/madengine/configs/data/local.yaml`:
+
+```yaml
+# @package _global_
+data:
+  provider: local
+  path: null
+```
+
+Write to `src/madengine/configs/data/s3.yaml`:
+
+```yaml
+# @package _global_
+data:
+  provider: s3
+  bucket: null
+  prefix: null
+  region: null
+```
+
+Write to `src/madengine/configs/data/minio.yaml`:
+
+```yaml
+# @package _global_
+data:
+  provider: minio
+  endpoint: null
+  bucket: null
+  access_key: null
+  secret_key: null
+```
+
+Write to `src/madengine/configs/data/nas.yaml`:
+
+```yaml
+# @package _global_
+data:
+  provider: nas
+  mount_path: null
+```
+
+- [ ] **Step 5: Create build configs**
+
+Write to `src/madengine/configs/build/default.yaml`:
+
+```yaml
+# @package _global_
+build:
+  registry: null
+  target_archs: []
+  manifest_output: build_manifest.json
+```
+
+Write to `src/madengine/configs/build/ci.yaml`:
+
+```yaml
+# @package _global_
+docker:
+  clean_cache: true
+
+build:
+  registry: null
+  target_archs: []
+  manifest_output: build_manifest.json
+```
+
+Write to `src/madengine/configs/build/multi_arch.yaml`:
+
+```yaml
+# @package _global_
+build:
+  registry: null
+  target_archs:
+    - gfx942
+    - gfx90a
+    - gfx908
+  manifest_output: build_manifest.json
+```
+
+- [ ] **Step 6: Verify append-only group composition**
+
+Run: `python -c "
+from hydra import compose, initialize_config_dir
+from hydra.core.global_hydra import GlobalHydra
+from omegaconf import OmegaConf
+import os
+GlobalHydra.instance().clear()
+config_dir = os.path.abspath('src/madengine/configs')
+with initialize_config_dir(config_dir=config_dir, version_base=None):
+    cfg = compose(config_name='config', overrides=['scheduler=slurm', 'launcher=torchrun', '+profile=mi300x_8gpu', '+env=nccl_debug'])
+print(OmegaConf.to_yaml(cfg))
+"`
+
+Expected: Prints composed config with SLURM scheduler, torchrun launcher, mi300x profile, and NCCL debug env vars all merged.
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add src/madengine/configs/
+git commit -m "feat(config): add append-only config groups (profile, env, tools, data, build)"
+```
+
+---
+
+### Task 4: Implement HydraConfigLoader
+
+**Files:**
+- Create: `src/madengine/config/__init__.py`
+- Create: `src/madengine/config/loader.py`
+- Test: `tests/unit/test_hydra_config_loader.py`
+
+- [ ] **Step 1: Write failing tests for HydraConfigLoader**
+
+Write to `tests/unit/test_hydra_config_loader.py`:
+
+```python
+#!/usr/bin/env python3
+"""Tests for HydraConfigLoader."""
+
+import os
+import pytest
+import tempfile
+from pathlib import Path
+
+from omegaconf import DictConfig
+
+from madengine.config.loader import HydraConfigLoader
+from madengine.core.errors import ConfigurationError
+
+
+class TestParseArgs:
+    def test_hydra_overrides_only(self):
+        user_file, overrides = HydraConfigLoader._parse_args(
+            ["scheduler=slurm", "distributed.nnodes=4"]
+        )
+        assert user_file is None
+        assert overrides == ["scheduler=slurm", "distributed.nnodes=4"]
+
+    def test_yaml_file_only(self):
+        user_file, overrides = HydraConfigLoader._parse_args(
+            ["/path/to/config.yaml"]
+        )
+        assert user_file == "/path/to/config.yaml"
+        assert overrides == []
+
+    def test_yaml_file_with_overrides(self):
+        user_file, overrides = HydraConfigLoader._parse_args(
+            ["/path/to/config.yaml", "distributed.nnodes=8"]
+        )
+        assert user_file == "/path/to/config.yaml"
+        assert overrides == ["distributed.nnodes=8"]
+
+    def test_yml_extension_recognized(self):
+        user_file, overrides = HydraConfigLoader._parse_args(
+            ["/path/to/config.yml"]
+        )
+        assert user_file == "/path/to/config.yml"
+
+    def test_multiple_yaml_files_raises(self):
+        with pytest.raises(ConfigurationError, match="Only one YAML"):
+            HydraConfigLoader._parse_args(
+                ["/path/a.yaml", "/path/b.yaml"]
+            )
+
+    def test_append_override_not_treated_as_file(self):
+        user_file, overrides = HydraConfigLoader._parse_args(
+            ["+profile=mi300x_8gpu"]
+        )
+        assert user_file is None
+        assert overrides == ["+profile=mi300x_8gpu"]
+
+    def test_empty_args(self):
+        user_file, overrides = HydraConfigLoader._parse_args([])
+        assert user_file is None
+        assert overrides == []
+
+
+class TestLoad:
+    def test_defaults_only(self):
+        cfg = HydraConfigLoader.load([])
+        assert isinstance(cfg, DictConfig)
+        assert cfg.gpu_vendor == "AMD"
+        assert cfg.guest_os == "UBUNTU"
+        assert cfg.distributed.enabled is False
+
+    def test_scheduler_override(self):
+        cfg = HydraConfigLoader.load(["scheduler=slurm"])
+        assert "slurm" in cfg
+        assert cfg.slurm.partition == "amd-rccl"
+
+    def test_launcher_override(self):
+        cfg = HydraConfigLoader.load(["launcher=torchrun"])
+        assert cfg.distributed.enabled is True
+        assert cfg.distributed.launcher == "torchrun"
+
+    def test_inline_value_override(self):
+        cfg = HydraConfigLoader.load(
+            ["launcher=torchrun", "distributed.nnodes=4"]
+        )
+        assert cfg.distributed.nnodes == 4
+
+    def test_append_profile(self):
+        cfg = HydraConfigLoader.load(["+profile=mi300x_8gpu"])
+        assert cfg.gpu_type == "mi300x"
+        assert cfg.distributed.nproc_per_node == 8
+
+    def test_user_yaml_file(self):
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".yaml", delete=False
+        ) as f:
+            f.write("debug: true\nenv_vars:\n  MY_VAR: hello\n")
+            f.flush()
+            try:
+                cfg = HydraConfigLoader.load([f.name])
+                assert cfg.debug is True
+                assert cfg.env_vars.MY_VAR == "hello"
+            finally:
+                os.unlink(f.name)
+
+    def test_user_yaml_with_overrides(self):
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".yaml", delete=False
+        ) as f:
+            f.write("debug: true\n")
+            f.flush()
+            try:
+                cfg = HydraConfigLoader.load(
+                    [f.name, "scheduler=slurm"]
+                )
+                assert cfg.debug is True
+                assert "slurm" in cfg
+            finally:
+                os.unlink(f.name)
+
+    def test_hardware_nvidia(self):
+        cfg = HydraConfigLoader.load(["hardware=nvidia"])
+        assert cfg.gpu_vendor == "NVIDIA"
+        assert cfg.runtime.use_gpu_flag is True
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `pytest tests/unit/test_hydra_config_loader.py -v`
+Expected: FAIL — `ModuleNotFoundError: No module named 'madengine.config'`
+
+- [ ] **Step 3: Implement HydraConfigLoader**
+
+Write to `src/madengine/config/__init__.py`:
+
+```python
+"""Config-driven YAML configuration system for madengine."""
+
+from madengine.config.loader import HydraConfigLoader
+from madengine.config.translator import ConfigTranslator
+from madengine.config.schema import ConfigValidator
+
+
+def load_config(config_args: list) -> tuple:
+    """Load config from Hydra overrides and/or user YAML file.
+
+    Args:
+        config_args: List of Hydra overrides and/or a YAML file path.
+
+    Returns:
+        Tuple of (additional_context dict, metadata dict).
+    """
+    cfg = HydraConfigLoader.load(config_args)
+    errors = ConfigValidator.validate(cfg)
+    if errors:
+        from madengine.core.errors import ConfigurationError
+
+        raise ConfigurationError(
+            "Config validation errors:\n" + "\n".join(f"  - {e}" for e in errors)
+        )
+    return ConfigTranslator.to_additional_context(cfg)
+```
+
+Write to `src/madengine/config/loader.py`:
+
+```python
+"""Hydra-based config loader using the Compose API."""
+
+import importlib.resources
+import os
+from pathlib import Path
+
+from hydra import compose, initialize_config_dir
+from hydra.core.global_hydra import GlobalHydra
+from omegaconf import DictConfig, OmegaConf
+
+from madengine.core.errors import ConfigurationError
+
+
+class HydraConfigLoader:
+    """Loads madengine config using Hydra's Compose API."""
+
+    @staticmethod
+    def load(config_args: list) -> DictConfig:
+        """Load and compose config from Hydra overrides and/or user YAML.
+
+        Args:
+            config_args: Mix of Hydra overrides and optional user YAML path.
+
+        Returns:
+            Composed DictConfig with all merges applied.
+        """
+        user_file, overrides = HydraConfigLoader._parse_args(config_args)
+
+        config_dir = str(
+            Path(importlib.resources.files("madengine")) / "configs"
+        )
+
+        if not os.path.isdir(config_dir):
+            config_dir = str(
+                Path(__file__).parent.parent / "configs"
+            )
+
+        GlobalHydra.instance().clear()
+
+        with initialize_config_dir(config_dir=config_dir, version_base=None):
+            cfg = compose(config_name="config", overrides=overrides)
+
+        if user_file:
+            user_cfg = OmegaConf.load(user_file)
+            OmegaConf.set_struct(cfg, False)
+            cfg = OmegaConf.merge(cfg, user_cfg)
+
+        return cfg
+
+    @staticmethod
+    def _parse_args(config_args: list) -> tuple:
+        """Separate user YAML file path from Hydra overrides."""
+        user_file = None
+        overrides = []
+        for arg in config_args:
+            if (
+                arg.endswith((".yaml", ".yml"))
+                and "=" not in arg
+                and not arg.startswith("+")
+            ):
+                if user_file:
+                    raise ConfigurationError(
+                        "Only one YAML config file allowed"
+                    )
+                user_file = arg
+            else:
+                overrides.append(arg)
+        return user_file, overrides
+```
+
+- [ ] **Step 4: Create stub translator and schema so imports resolve**
+
+Write to `src/madengine/config/translator.py`:
+
+```python
+"""Translates clean YAML config to internal additional_context format."""
+
+from omegaconf import DictConfig, OmegaConf
+
+
+class ConfigTranslator:
+    """Maps YAML config keys to internal additional_context dict format."""
+
+    KEY_MAP = {
+        "docker.build_args": "docker_build_arg",
+        "docker.env_vars": "docker_env_vars",
+        "docker.mounts": "docker_mounts",
+        "docker.gpus": "docker_gpus",
+        "docker.cpus": "docker_cpus",
+        "docker.additional_run_options": "additional_docker_run_options",
+        "log_error.pattern_scan": "log_error_pattern_scan",
+        "log_error.benign_patterns": "log_error_benign_patterns",
+        "log_error.patterns": "log_error_patterns",
+    }
+
+    EXTRACTED_KEYS = {
+        "model", "build", "platform", "output",
+        "summary_output", "data_config", "live_output",
+    }
+
+    @classmethod
+    def to_additional_context(cls, cfg: DictConfig) -> tuple:
+        """Placeholder — implemented in Task 5."""
+        return {}, {}
+```
+
+Write to `src/madengine/config/schema.py`:
+
+```python
+"""Config validation."""
+
+from omegaconf import DictConfig
+
+
+class ConfigValidator:
+    """Validates composed config for consistency."""
+
+    @staticmethod
+    def validate(cfg: DictConfig) -> list:
+        """Placeholder — implemented in Task 6."""
+        return []
+```
+
+- [ ] **Step 5: Run tests to verify they pass**
+
+Run: `pytest tests/unit/test_hydra_config_loader.py -v`
+Expected: All tests PASS.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add src/madengine/config/ tests/unit/test_hydra_config_loader.py
+git commit -m "feat(config): implement HydraConfigLoader with Compose API"
+```
+
+---
+
+### Task 5: Implement ConfigTranslator
+
+**Files:**
+- Modify: `src/madengine/config/translator.py`
+- Test: `tests/unit/test_config_translator.py`
+
+- [ ] **Step 1: Write failing tests for ConfigTranslator**
+
+Write to `tests/unit/test_config_translator.py`:
+
+```python
+#!/usr/bin/env python3
+"""Tests for ConfigTranslator."""
+
+import pytest
+from omegaconf import OmegaConf
+
+from madengine.config.translator import ConfigTranslator
+
+
+def make_cfg(overrides: dict) -> "DictConfig":
+    """Build a DictConfig from a base + overrides for testing."""
+    base = {
+        "model": {"tags": [], "manifest_file": None, "container_image": None, "skip_run": False, "timeout": None},
+        "docker": {"build_args": {}, "env_vars": {}, "mounts": {}, "gpus": None, "cpus": None, "additional_run_options": None, "keep_alive": False, "clean_cache": False},
+        "build": {"registry": None, "target_archs": [], "manifest_output": "build_manifest.json"},
+        "env_vars": {},
+        "debug": False,
+        "live_output": False,
+        "log_error": {"pattern_scan": True, "benign_patterns": [], "patterns": []},
+        "tools": [],
+        "pre_scripts": [],
+        "post_scripts": [],
+        "encapsulate_script": None,
+        "data_config": "data.json",
+        "output": "perf.csv",
+        "summary_output": None,
+        "gpu_vendor": "AMD",
+        "guest_os": "UBUNTU",
+        "runtime": {"devices": [], "capabilities": [], "security_opts": [], "network_mode": "host", "ipc": "host", "groups": [], "use_gpu_flag": False},
+        "platform": {"type": "docker"},
+    }
+    merged = {**base, **overrides}
+    return OmegaConf.create(merged)
+
+
+class TestDockerKeyMapping:
+    def test_build_args_mapped(self):
+        cfg = make_cfg({"docker": {"build_args": {"KEY": "val"}, "env_vars": {}, "mounts": {}, "gpus": None, "cpus": None, "additional_run_options": None, "keep_alive": False, "clean_cache": False}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["docker_build_arg"] == {"KEY": "val"}
+
+    def test_env_vars_mapped(self):
+        cfg = make_cfg({"docker": {"build_args": {}, "env_vars": {"A": "1"}, "mounts": {}, "gpus": None, "cpus": None, "additional_run_options": None, "keep_alive": False, "clean_cache": False}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["docker_env_vars"] == {"A": "1"}
+
+    def test_null_gpus_excluded(self):
+        cfg = make_cfg({})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert "docker_gpus" not in ctx
+
+    def test_non_null_gpus_included(self):
+        cfg = make_cfg({"docker": {"build_args": {}, "env_vars": {}, "mounts": {}, "gpus": "0-3", "cpus": None, "additional_run_options": None, "keep_alive": False, "clean_cache": False}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["docker_gpus"] == "0-3"
+
+
+class TestLogErrorMapping:
+    def test_pattern_scan_mapped(self):
+        cfg = make_cfg({"log_error": {"pattern_scan": False, "benign_patterns": [], "patterns": []}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["log_error_pattern_scan"] is False
+
+    def test_patterns_mapped(self):
+        cfg = make_cfg({"log_error": {"pattern_scan": True, "benign_patterns": ["OK"], "patterns": ["ERR"]}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["log_error_benign_patterns"] == ["OK"]
+        assert ctx["log_error_patterns"] == ["ERR"]
+
+
+class TestPassthroughKeys:
+    def test_gpu_vendor_passthrough(self):
+        cfg = make_cfg({"gpu_vendor": "NVIDIA"})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["gpu_vendor"] == "NVIDIA"
+
+    def test_env_vars_passthrough(self):
+        cfg = make_cfg({"env_vars": {"MY": "VAR"}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["env_vars"] == {"MY": "VAR"}
+
+    def test_slurm_passthrough(self):
+        cfg = make_cfg({"slurm": {"partition": "gpu"}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["slurm"] == {"partition": "gpu"}
+
+    def test_distributed_passthrough(self):
+        cfg = make_cfg({"distributed": {"enabled": True, "launcher": "torchrun"}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["distributed"]["launcher"] == "torchrun"
+
+    def test_tools_passthrough(self):
+        cfg = make_cfg({"tools": [{"name": "rpd"}]})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["tools"] == [{"name": "rpd"}]
+
+
+class TestExtractedKeys:
+    def test_model_extracted(self):
+        cfg = make_cfg({"model": {"tags": ["dummy"], "manifest_file": None, "container_image": None, "skip_run": False, "timeout": 300}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert "model" not in ctx
+        assert meta["model"]["tags"] == ["dummy"]
+        assert meta["model"]["timeout"] == 300
+
+    def test_build_extracted(self):
+        cfg = make_cfg({"build": {"registry": "myregistry.io", "target_archs": ["gfx942"], "manifest_output": "build_manifest.json"}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert "build" not in ctx
+        assert meta["build"]["registry"] == "myregistry.io"
+
+    def test_platform_extracted(self):
+        cfg = make_cfg({})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert "platform" not in ctx
+        assert meta["platform"]["type"] == "docker"
+
+    def test_container_image_promoted(self):
+        cfg = make_cfg({"model": {"tags": [], "manifest_file": None, "container_image": "myimage:latest", "skip_run": False, "timeout": None}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["MAD_CONTAINER_IMAGE"] == "myimage:latest"
+
+    def test_runtime_extracted(self):
+        cfg = make_cfg({})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert "runtime" not in ctx
+        assert "runtime" in meta
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `pytest tests/unit/test_config_translator.py -v`
+Expected: FAIL — translator returns empty dicts.
+
+- [ ] **Step 3: Implement ConfigTranslator**
+
+Replace the content of `src/madengine/config/translator.py` with:
+
+```python
+"""Translates clean YAML config to internal additional_context format."""
+
+from omegaconf import DictConfig, OmegaConf
+
+
+class ConfigTranslator:
+    """Maps YAML config keys to internal additional_context dict format."""
+
+    KEY_MAP = {
+        "docker.build_args": "docker_build_arg",
+        "docker.env_vars": "docker_env_vars",
+        "docker.mounts": "docker_mounts",
+        "docker.gpus": "docker_gpus",
+        "docker.cpus": "docker_cpus",
+        "docker.additional_run_options": "additional_docker_run_options",
+        "log_error.pattern_scan": "log_error_pattern_scan",
+        "log_error.benign_patterns": "log_error_benign_patterns",
+        "log_error.patterns": "log_error_patterns",
+    }
+
+    EXTRACTED_KEYS = {
+        "model", "build", "platform", "output",
+        "summary_output", "data_config", "live_output",
+    }
+
+    @classmethod
+    def to_additional_context(cls, cfg: DictConfig) -> tuple:
+        """Convert DictConfig to (additional_context, metadata) tuple.
+
+        Returns:
+            additional_context: dict in the format expected by existing pipeline.
+            metadata: dict with model.tags, build.registry, etc. for the CLI layer.
+        """
+        raw = OmegaConf.to_container(cfg, resolve=True)
+
+        context = {}
+        metadata = {}
+
+        for key, value in raw.items():
+            if key in cls.EXTRACTED_KEYS:
+                metadata[key] = value
+            elif key == "docker":
+                for subkey, subval in value.items():
+                    internal_key = cls.KEY_MAP.get(
+                        f"docker.{subkey}", f"docker_{subkey}"
+                    )
+                    if subval is not None:
+                        context[internal_key] = subval
+            elif key == "log_error":
+                for subkey, subval in value.items():
+                    internal_key = cls.KEY_MAP.get(
+                        f"log_error.{subkey}", f"log_error_{subkey}"
+                    )
+                    context[internal_key] = subval
+            elif key == "runtime":
+                metadata["runtime"] = value
+            else:
+                if value is not None:
+                    context[key] = value
+
+        model = metadata.get("model", {})
+        if model and model.get("container_image"):
+            context["MAD_CONTAINER_IMAGE"] = model["container_image"]
+
+        return context, metadata
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `pytest tests/unit/test_config_translator.py -v`
+Expected: All tests PASS.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/madengine/config/translator.py tests/unit/test_config_translator.py
+git commit -m "feat(config): implement ConfigTranslator key mapping"
+```
+
+---
+
+### Task 6: Implement ConfigValidator
+
+**Files:**
+- Modify: `src/madengine/config/schema.py`
+- Test: `tests/unit/test_config_schema.py`
+
+- [ ] **Step 1: Write failing tests for ConfigValidator**
+
+Write to `tests/unit/test_config_schema.py`:
+
+```python
+#!/usr/bin/env python3
+"""Tests for ConfigValidator."""
+
+import pytest
+from omegaconf import OmegaConf
+
+from madengine.config.schema import ConfigValidator
+
+
+def make_cfg(data: dict) -> "DictConfig":
+    return OmegaConf.create(data)
+
+
+class TestConflictDetection:
+    def test_slurm_and_k8s_conflict(self):
+        cfg = make_cfg({"slurm": {"partition": "gpu"}, "k8s": {"namespace": "default"}})
+        errors = ConfigValidator.validate(cfg)
+        assert any("Cannot specify both" in e for e in errors)
+
+    def test_slurm_only_no_conflict(self):
+        cfg = make_cfg({"slurm": {"partition": "gpu"}})
+        errors = ConfigValidator.validate(cfg)
+        assert not any("Cannot specify both" in e for e in errors)
+
+    def test_k8s_only_no_conflict(self):
+        cfg = make_cfg({"k8s": {"namespace": "default"}})
+        errors = ConfigValidator.validate(cfg)
+        assert not any("Cannot specify both" in e for e in errors)
+
+
+class TestDistributedValidation:
+    def test_enabled_without_launcher(self):
+        cfg = make_cfg({"distributed": {"enabled": True}})
+        errors = ConfigValidator.validate(cfg)
+        assert any("requires distributed.launcher" in e for e in errors)
+
+    def test_enabled_with_launcher(self):
+        cfg = make_cfg({"distributed": {"enabled": True, "launcher": "torchrun"}})
+        errors = ConfigValidator.validate(cfg)
+        assert not any("requires distributed.launcher" in e for e in errors)
+
+    def test_invalid_nnodes(self):
+        cfg = make_cfg({"distributed": {"enabled": True, "launcher": "torchrun", "nnodes": -1}})
+        errors = ConfigValidator.validate(cfg)
+        assert any("positive integer" in e for e in errors)
+
+    def test_valid_nnodes(self):
+        cfg = make_cfg({"distributed": {"enabled": True, "launcher": "torchrun", "nnodes": 4}})
+        errors = ConfigValidator.validate(cfg)
+        assert not any("positive integer" in e for e in errors)
+
+
+class TestUnknownKeys:
+    def test_unknown_top_level_key(self):
+        cfg = make_cfg({"gpu_vendor": "AMD", "typo_key": "oops"})
+        errors = ConfigValidator.validate(cfg)
+        assert any("Unknown config key: 'typo_key'" in e for e in errors)
+
+    def test_known_keys_accepted(self):
+        cfg = make_cfg({"gpu_vendor": "AMD", "debug": True, "env_vars": {}})
+        errors = ConfigValidator.validate(cfg)
+        assert not any("Unknown config key" in e for e in errors)
+
+
+class TestPlatformValidation:
+    def test_unsupported_platform(self):
+        cfg = make_cfg({"platform": {"type": "bare_metal"}})
+        errors = ConfigValidator.validate(cfg)
+        assert any("not yet supported" in e for e in errors)
+
+    def test_docker_platform_ok(self):
+        cfg = make_cfg({"platform": {"type": "docker"}})
+        errors = ConfigValidator.validate(cfg)
+        assert not any("not yet supported" in e for e in errors)
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `pytest tests/unit/test_config_schema.py -v`
+Expected: FAIL — validator returns empty list.
+
+- [ ] **Step 3: Implement ConfigValidator**
+
+Replace the content of `src/madengine/config/schema.py` with:
+
+```python
+"""Config validation for composed Hydra configs."""
+
+from omegaconf import DictConfig
+
+
+KNOWN_TOP_LEVEL_KEYS = {
+    "defaults", "platform", "scheduler", "hardware", "launcher",
+    "model", "docker", "build", "env_vars", "debug", "live_output",
+    "log_error", "tools", "pre_scripts", "post_scripts",
+    "encapsulate_script", "data_config", "output", "summary_output",
+    "gpu_vendor", "guest_os", "runtime", "slurm", "k8s",
+    "kubernetes", "distributed", "vllm", "sglang_disagg",
+    "shared_data", "timeout", "gpu_type", "gpu_memory_gb",
+    "gpus_per_node", "data",
+}
+
+SUPPORTED_PLATFORMS = {"docker"}
+
+
+class ConfigValidator:
+    """Validates composed config for consistency."""
+
+    @staticmethod
+    def validate(cfg: DictConfig) -> list:
+        """Return list of validation errors (empty = valid)."""
+        errors = []
+
+        raw = dict(cfg) if hasattr(cfg, "keys") else {}
+
+        if raw.get("slurm") and raw.get("k8s"):
+            errors.append(
+                "Cannot specify both 'slurm' and 'k8s' sections"
+            )
+
+        dist = raw.get("distributed")
+        if isinstance(dist, dict):
+            if dist.get("enabled") and not dist.get("launcher"):
+                errors.append(
+                    "distributed.enabled=true requires distributed.launcher"
+                )
+            nnodes = dist.get("nnodes")
+            if nnodes is not None:
+                if not isinstance(nnodes, int) or nnodes < 1:
+                    errors.append(
+                        "distributed.nnodes must be a positive integer"
+                    )
+
+        platform = raw.get("platform")
+        if isinstance(platform, dict):
+            ptype = platform.get("type")
+            if ptype and ptype not in SUPPORTED_PLATFORMS:
+                errors.append(
+                    f"Platform '{ptype}' is not yet supported. "
+                    f"Supported: {', '.join(sorted(SUPPORTED_PLATFORMS))}"
+                )
+
+        for key in raw:
+            if key not in KNOWN_TOP_LEVEL_KEYS:
+                errors.append(f"Unknown config key: '{key}'")
+
+        return errors
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `pytest tests/unit/test_config_schema.py -v`
+Expected: All tests PASS.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/madengine/config/schema.py tests/unit/test_config_schema.py
+git commit -m "feat(config): implement ConfigValidator with cross-field checks"
+```
+
+---
+
+### Task 7: Integration Test — load_config End-to-End
+
+**Files:**
+- Create: `tests/unit/test_config_integration.py`
+- Create: `tests/fixtures/configs/test_slurm_job.yaml`
+
+- [ ] **Step 1: Create test fixture YAML**
+
+Write to `tests/fixtures/configs/test_slurm_job.yaml`:
+
+```yaml
+model:
+  tags: [dummy]
+
+slurm:
+  partition: test-partition
+  nodes: 2
+
+distributed:
+  enabled: true
+  launcher: torchrun
+  nnodes: 2
+  nproc_per_node: 4
+
+env_vars:
+  MY_VAR: test_value
+
+debug: true
+```
+
+- [ ] **Step 2: Write integration tests**
+
+Write to `tests/unit/test_config_integration.py`:
+
+```python
+#!/usr/bin/env python3
+"""Integration tests for load_config end-to-end pipeline."""
+
+import os
+import pytest
+from pathlib import Path
+
+from madengine.config import load_config
+from madengine.core.errors import ConfigurationError
+
+
+FIXTURES_DIR = Path(__file__).parent.parent / "fixtures" / "configs"
+
+
+class TestLoadConfigEndToEnd:
+    def test_defaults_produce_valid_context(self):
+        ctx, meta = load_config([])
+        assert ctx["gpu_vendor"] == "AMD"
+        assert ctx["guest_os"] == "UBUNTU"
+        assert meta["model"]["tags"] == []
+
+    def test_scheduler_slurm(self):
+        ctx, meta = load_config(["scheduler=slurm"])
+        assert "slurm" in ctx
+        assert ctx["slurm"]["partition"] == "amd-rccl"
+
+    def test_launcher_torchrun(self):
+        ctx, meta = load_config(["launcher=torchrun"])
+        assert ctx["distributed"]["enabled"] is True
+        assert ctx["distributed"]["launcher"] == "torchrun"
+
+    def test_combined_overrides(self):
+        ctx, meta = load_config([
+            "scheduler=slurm",
+            "launcher=torchrun",
+            "distributed.nnodes=4",
+            "+env=nccl_debug",
+        ])
+        assert ctx["distributed"]["nnodes"] == 4
+        assert ctx["env_vars"]["NCCL_DEBUG"] == "INFO"
+        assert "slurm" in ctx
+
+    def test_user_yaml_file(self):
+        yaml_path = str(FIXTURES_DIR / "test_slurm_job.yaml")
+        ctx, meta = load_config([yaml_path])
+        assert meta["model"]["tags"] == ["dummy"]
+        assert ctx["slurm"]["partition"] == "test-partition"
+        assert ctx["distributed"]["nnodes"] == 2
+        assert ctx["env_vars"]["MY_VAR"] == "test_value"
+        assert ctx["debug"] is True
+
+    def test_user_yaml_with_override(self):
+        yaml_path = str(FIXTURES_DIR / "test_slurm_job.yaml")
+        ctx, meta = load_config([yaml_path, "distributed.nnodes=8"])
+        assert ctx["distributed"]["nnodes"] == 8
+
+    def test_docker_keys_translated(self):
+        ctx, meta = load_config(["docker.build_args.KEY=val"])
+        assert ctx["docker_build_arg"]["KEY"] == "val"
+
+    def test_slurm_and_k8s_conflict_raises(self):
+        with pytest.raises(ConfigurationError, match="Cannot specify both"):
+            load_config(["scheduler=slurm", "k8s.namespace=test"])
+
+    def test_unsupported_platform_raises(self):
+        with pytest.raises(ConfigurationError, match="not yet supported"):
+            load_config(["platform=bare_metal"])
+
+    def test_container_image_promoted(self):
+        ctx, meta = load_config(
+            ["model.container_image=myimage:latest"]
+        )
+        assert ctx["MAD_CONTAINER_IMAGE"] == "myimage:latest"
+
+    def test_model_tags_in_metadata(self):
+        ctx, meta = load_config(["model.tags=[dummy,bert]"])
+        assert meta["model"]["tags"] == ["dummy", "bert"]
+        assert "model" not in ctx
+
+    def test_profile_append(self):
+        ctx, meta = load_config(["+profile=mi300x_8gpu"])
+        assert ctx["gpu_type"] == "mi300x"
+        assert ctx["env_vars"]["HSA_ENABLE_SDMA"] == "0"
+
+    def test_tools_append(self):
+        ctx, meta = load_config(["+tools=rocprofv3_lightweight"])
+        assert len(ctx["tools"]) == 1
+        assert ctx["tools"][0]["name"] == "rocprofv3_lightweight"
+```
+
+- [ ] **Step 3: Run integration tests**
+
+Run: `pytest tests/unit/test_config_integration.py -v`
+Expected: All tests PASS.
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add tests/unit/test_config_integration.py tests/fixtures/configs/
+git commit -m "test(config): add integration tests for load_config pipeline"
+```
+
+---
+
+### Task 8: Integrate --config into CLI run Command
+
+**Files:**
+- Modify: `src/madengine/cli/commands/run.py`
+
+- [ ] **Step 1: Add --config parameter and merge logic to run command**
+
+In `src/madengine/cli/commands/run.py`, add the import at the top (after the existing imports, around line 9):
+
+```python
+import ast
+```
+
+Add the `--config` parameter to the `run` function signature, after the `additional_context_file` parameter (after line 83):
+
+```python
+    config: Annotated[
+        Optional[List[str]],
+        typer.Option(
+            "--config",
+            help="YAML config file and/or Hydra overrides (e.g., --config my_job.yaml, --config scheduler=slurm launcher=torchrun)",
+        ),
+    ] = None,
+```
+
+After line 165 (`processed_tags = split_comma_separated_tags(tags)`), insert the config loading block:
+
+```python
+    # Load --config YAML if provided
+    if config:
+        from madengine.config import load_config
+
+        config_ctx, config_meta = load_config(config)
+
+        # Config values provide defaults; explicit CLI args override
+        if not processed_tags and config_meta.get("model", {}).get("tags"):
+            processed_tags = config_meta["model"]["tags"]
+        if timeout == DEFAULT_TIMEOUT and config_meta.get("model", {}).get("timeout"):
+            timeout = config_meta["model"]["timeout"]
+        if not manifest_file and config_meta.get("model", {}).get("manifest_file"):
+            manifest_file = config_meta["model"]["manifest_file"]
+        if not registry and config_meta.get("build", {}).get("registry"):
+            registry = config_meta["build"]["registry"]
+
+        # Merge: config is base, --additional-context overrides
+        parsed_ac = {}
+        if additional_context and additional_context.strip() != "{}":
+            try:
+                parsed_ac = json.loads(additional_context)
+            except json.JSONDecodeError:
+                parsed_ac = ast.literal_eval(additional_context)
+
+        def _deep_merge(base: dict, override: dict) -> dict:
+            result = base.copy()
+            for k, v in override.items():
+                if k in result and isinstance(result[k], dict) and isinstance(v, dict):
+                    result[k] = _deep_merge(result[k], v)
+                else:
+                    result[k] = v
+            return result
+
+        merged = _deep_merge(config_ctx, parsed_ac)
+        additional_context = repr(merged)
+        additional_context_file = None
+```
+
+- [ ] **Step 2: Verify the existing test suite still passes**
+
+Run: `pytest tests/unit/test_cli.py -v`
+Expected: All existing tests PASS (backward compatibility preserved).
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add src/madengine/cli/commands/run.py
+git commit -m "feat(config): integrate --config into run command"
+```
+
+---
+
+### Task 9: Integrate --config into CLI build Command
+
+**Files:**
+- Modify: `src/madengine/cli/commands/build.py`
+
+- [ ] **Step 1: Add --config parameter and merge logic to build command**
+
+In `src/madengine/cli/commands/build.py`, add the import at the top (after existing imports, around line 9):
+
+```python
+import ast
+```
+
+Add the `--config` parameter to the `build` function signature, after the `additional_context_file` parameter (after line 71):
+
+```python
+    config: Annotated[
+        Optional[List[str]],
+        typer.Option(
+            "--config",
+            help="YAML config file and/or Hydra overrides (e.g., --config my_job.yaml, --config scheduler=slurm)",
+        ),
+    ] = None,
+```
+
+After line 104 (`processed_tags = split_comma_separated_tags(tags)`), insert the config loading block:
+
+```python
+    # Load --config YAML if provided
+    if config:
+        from madengine.config import load_config
+
+        config_ctx, config_meta = load_config(config)
+
+        # Config values provide defaults; explicit CLI args override
+        if not processed_tags and config_meta.get("model", {}).get("tags"):
+            processed_tags = config_meta["model"]["tags"]
+        if not registry and config_meta.get("build", {}).get("registry"):
+            registry = config_meta["build"]["registry"]
+        build_meta = config_meta.get("build", {})
+        if not target_archs and build_meta.get("target_archs"):
+            target_archs = build_meta["target_archs"]
+
+        # Merge: config is base, --additional-context overrides
+        parsed_ac = {}
+        if additional_context and additional_context.strip() != "{}":
+            try:
+                parsed_ac = json.loads(additional_context)
+            except json.JSONDecodeError:
+                parsed_ac = ast.literal_eval(additional_context)
+
+        def _deep_merge(base: dict, override: dict) -> dict:
+            result = base.copy()
+            for k, v in override.items():
+                if k in result and isinstance(result[k], dict) and isinstance(v, dict):
+                    result[k] = _deep_merge(result[k], v)
+                else:
+                    result[k] = v
+            return result
+
+        merged = _deep_merge(config_ctx, parsed_ac)
+        additional_context = repr(merged)
+        additional_context_file = None
+```
+
+- [ ] **Step 2: Verify the existing test suite still passes**
+
+Run: `pytest tests/unit/test_cli.py -v`
+Expected: All existing tests PASS.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add src/madengine/cli/commands/build.py
+git commit -m "feat(config): integrate --config into build command"
+```
+
+---
+
+### Task 10: Extract _deep_merge to Shared Utility
+
+The `_deep_merge` function is duplicated in both `run.py` and `build.py`. Extract it.
+
+**Files:**
+- Modify: `src/madengine/cli/utils.py`
+- Modify: `src/madengine/cli/commands/run.py`
+- Modify: `src/madengine/cli/commands/build.py`
+
+- [ ] **Step 1: Add deep_merge to cli/utils.py**
+
+At the bottom of `src/madengine/cli/utils.py`, add:
+
+```python
+def deep_merge(base: dict, override: dict) -> dict:
+    """Recursively merge override into base. Override wins on conflicts."""
+    result = base.copy()
+    for k, v in override.items():
+        if k in result and isinstance(result[k], dict) and isinstance(v, dict):
+            result[k] = deep_merge(result[k], v)
+        else:
+            result[k] = v
+    return result
+```
+
+- [ ] **Step 2: Update run.py to use shared deep_merge**
+
+In `src/madengine/cli/commands/run.py`, add `deep_merge` to the import from `..utils`:
+
+```python
+from ..utils import (
+    console,
+    setup_logging,
+    split_comma_separated_tags,
+    create_args_namespace,
+    save_summary_with_feedback,
+    display_results_table,
+    display_performance_table,
+    deep_merge,
+)
+```
+
+Remove the inline `_deep_merge` function definition and replace `_deep_merge(` with `deep_merge(` in the config loading block.
+
+- [ ] **Step 3: Update build.py to use shared deep_merge**
+
+In `src/madengine/cli/commands/build.py`, add `deep_merge` to the import from `..utils`:
+
+```python
+from ..utils import (
+    console,
+    setup_logging,
+    split_comma_separated_tags,
+    create_args_namespace,
+    save_summary_with_feedback,
+    display_results_table,
+    deep_merge,
+)
+```
+
+Remove the inline `_deep_merge` function definition and replace `_deep_merge(` with `deep_merge(` in the config loading block.
+
+- [ ] **Step 4: Run all tests**
+
+Run: `pytest tests/unit/ -v --timeout=60`
+Expected: All tests PASS.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/madengine/cli/utils.py src/madengine/cli/commands/run.py src/madengine/cli/commands/build.py
+git commit -m "refactor(config): extract deep_merge to shared utility"
+```
+
+---
+
+### Task 11: Final Verification — Full Test Suite
+
+**Files:**
+- No new files — verification only.
+
+- [ ] **Step 1: Run the complete unit test suite**
+
+Run: `pytest tests/unit/ -v --timeout=60`
+Expected: All tests PASS including the new config tests.
+
+- [ ] **Step 2: Run the pre-commit hooks**
+
+Run: `pre-commit run --all-files`
+Expected: All hooks pass (black, isort, flake8).
+
+- [ ] **Step 3: Verify Hydra config composition end-to-end**
+
+Run: `python -c "
+from madengine.config import load_config
+ctx, meta = load_config(['scheduler=slurm', 'launcher=torchrun', '+profile=mi300x_8gpu', '+env=nccl_debug', 'model.tags=[dummy]'])
+print('Tags:', meta['model']['tags'])
+print('Launcher:', ctx['distributed']['launcher'])
+print('Partition:', ctx['slurm']['partition'])
+print('NCCL_DEBUG:', ctx['env_vars'].get('NCCL_DEBUG'))
+print('GPU type:', ctx.get('gpu_type'))
+"`
+
+Expected output:
+```
+Tags: ['dummy']
+Launcher: torchrun
+Partition: amd-rccl
+NCCL_DEBUG: INFO
+GPU type: mi300x
+```
+
+- [ ] **Step 4: Verify CLI help text includes --config**
+
+Run: `madengine run --help | grep -A2 "config"`
+Expected: Shows `--config` option with help text about YAML config files and Hydra overrides.
+
+- [ ] **Step 5: Final commit if any formatting fixes were needed**
+
+```bash
+git add -u
+git commit -m "style: apply formatting fixes from pre-commit hooks"
+```
diff --git a/docs/superpowers/specs/2026-05-02-config-driven-yaml-design.md b/docs/superpowers/specs/2026-05-02-config-driven-yaml-design.md
new file mode 100644
index 00000000..8c57d67e
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-02-config-driven-yaml-design.md
@@ -0,0 +1,973 @@
+# Config-Driven YAML System for madengine
+
+**Date:** 2026-05-02
+**Status:** Draft
+**Author:** Stephen Shao + Claude
+
+## Overview
+
+Add a `--config` CLI argument to madengine that accepts Hydra-based YAML configuration files with full CLI override support. This replaces the error-prone `--additional-context` JSON string approach with structured, composable YAML configs that can drive the entire workflow from a single file — model selection, deployment target, distributed training, profiling tools, and environment tuning.
+
+## Goals
+
+1. Single `--config` argument drives the full madengine workflow (build + run)
+2. Hydra config groups for composable deployment configurations
+3. CLI override support via dot-path syntax (`distributed.nnodes=4`)
+4. Clean, readable YAML keys with a translator to internal format
+5. Backward compatible: `--additional-context` still works and overrides `--config`
+6. Extensible for future platforms (bare metal, Singularity, Podman)
+
+## Non-Goals
+
+- Replacing `models.json` or `data.json` with YAML (they remain as-is)
+- Adding Hydra's `@hydra.main` decorator (Typer remains the CLI framework)
+- Recipe configs (can be added later as a config group)
+
+---
+
+## Config Directory Structure
+
+```
+src/madengine/configs/
+├── config.yaml                         # Root defaults + top-level settings
+│
+├── platform/                           # WHERE: execution platform
+│   ├── docker.yaml                     #   Docker container (default)
+│   ├── bare_metal.yaml                 #   Direct execution, no container (future)
+│   ├── singularity.yaml                #   Singularity/Apptainer (future)
+│   └── podman.yaml                     #   Podman container (future)
+│
+├── scheduler/                          # HOW: job scheduling
+│   ├── local.yaml                      #   Direct execution on current host (default)
+│   ├── slurm.yaml                      #   SLURM HPC cluster
+│   └── k8s.yaml                        #   Kubernetes cluster
+│
+├── hardware/                           # WHAT GPU: vendor + runtime settings
+│   ├── amd.yaml                        #   AMD ROCm (default) — vendor, guest_os,
+│   │                                   #   device mounts, security opts, renderD
+│   ├── nvidia.yaml                     #   NVIDIA CUDA — vendor, --gpus flag
+│   └── cpu.yaml                        #   CPU-only — no GPU devices
+│
+├── launcher/                           # WHAT FRAMEWORK: distributed launcher
+│   ├── none.yaml                       #   No distributed launcher (default)
+│   ├── torchrun.yaml                   #   PyTorch torchrun
+│   ├── deepspeed.yaml                  #   DeepSpeed
+│   ├── megatron.yaml                   #   Megatron-LM
+│   ├── vllm.yaml                       #   vLLM inference serving
+│   ├── sglang.yaml                     #   SGLang inference serving
+│   ├── sglang_disagg.yaml              #   SGLang disaggregated prefill/decode
+│   ├── torchtitan.yaml                 #   TorchTitan
+│   ├── primus.yaml                     #   Primus launcher
+│   └── native.yaml                     #   Native distributed (manual setup)
+│
+├── profile/                            # OPTIONAL: hardware profiles (+profile=)
+│   ├── mi300x_8gpu.yaml
+│   ├── mi300x_single.yaml
+│   ├── mi250x_4gpu.yaml
+│   ├── h100_8gpu.yaml
+│   └── a100_8gpu.yaml
+│
+├── env/                                # OPTIONAL: env var bundles (+env=)
+│   ├── nccl_debug.yaml
+│   ├── nccl_tuned.yaml
+│   ├── infiniband.yaml
+│   └── miopen_defaults.yaml
+│
+├── tools/                              # OPTIONAL: profiling tools (+tools=)
+│   ├── rocprofv3_lightweight.yaml
+│   ├── rocprofv3_comprehensive.yaml
+│   ├── power_profiler.yaml
+│   ├── vram_profiler.yaml
+│   └── rocm_trace_lite.yaml
+│
+├── data/                               # OPTIONAL: data provider (+data=)
+│   ├── local.yaml                      #   Local filesystem data
+│   ├── s3.yaml                         #   AWS S3 data source
+│   ├── minio.yaml                      #   MinIO object storage
+│   └── nas.yaml                        #   NAS/NFS shared storage
+│
+└── build/                              # OPTIONAL: build settings (+build=)
+    ├── default.yaml                    #   Default build settings
+    ├── ci.yaml                         #   CI pipeline (no cache, strict)
+    └── multi_arch.yaml                 #   Multi-architecture builds
+```
+
+**Note:** `platform/` config group stubs (bare_metal, singularity, podman) are created with placeholder content for future extensibility. In Phase 1, only `docker` is functional — the others raise a `ConfigurationError("platform '{name}' is not yet supported")` if selected.
+
+### Config Group Types
+
+| Group | Type | Hydra Syntax | Purpose |
+|-------|------|-------------|---------|
+| `platform` | Default | `platform=docker` | Execution platform |
+| `scheduler` | Default | `scheduler=slurm` | Job scheduler |
+| `hardware` | Default | `hardware=amd` | GPU vendor + runtime |
+| `launcher` | Default | `launcher=torchrun` | Distributed launcher |
+| `profile` | Append-only | `+profile=mi300x_8gpu` | Hardware presets |
+| `env` | Append-only | `+env=nccl_tuned` | Env var bundles |
+| `tools` | Append-only | `+tools=rocprofv3_lightweight` | Profiling tools |
+| `data` | Append-only | `+data=local` | Data provider |
+| `build` | Append-only | `+build=ci` | Build settings |
+
+Default groups: exactly one option is selected; changing it replaces the previous selection.
+Append-only groups: added on top of existing config via `+` prefix; composable.
+
+---
+
+## YAML Schema
+
+### Root Config (`config.yaml`)
+
+```yaml
+defaults:
+  - platform: docker
+  - scheduler: local
+  - hardware: amd
+  - launcher: none
+  - _self_
+
+# Model selection
+model:
+  tags: []                         # Model tags to build+run (equivalent to --tags)
+  manifest_file: null              # Use existing manifest (equivalent to --manifest-file)
+  container_image: null            # Skip build, use image (equivalent to MAD_CONTAINER_IMAGE)
+  skip_run: false                  # Build only (equivalent to --skip-model-run)
+  timeout: null                    # Run timeout in seconds
+
+# Docker / container settings
+docker:
+  build_args: {}                   # --build-arg flags
+  env_vars: {}                     # --env flags for docker run
+  mounts: {}                       # -v host:container volume mounts
+  gpus: null                       # GPU device range (auto-detected if null)
+  cpus: null                       # CPU affinity (--cpuset-cpus)
+  additional_run_options: null     # Extra docker run flags
+  keep_alive: false                # Keep containers after run
+  clean_cache: false               # Rebuild without cache
+
+# Build settings
+build:
+  registry: null                   # Docker registry URL
+  target_archs: []                 # Target GPU architectures for multi-arch
+  manifest_output: build_manifest.json
+
+# Environment variables (passed to container/job — separate from docker.env_vars)
+env_vars: {}
+
+# Runtime behavior
+debug: false
+live_output: false
+
+# Error scanning
+log_error:
+  pattern_scan: true
+  benign_patterns: []
+  patterns: []
+
+# Scripts
+tools: []
+pre_scripts: []
+post_scripts: []
+encapsulate_script: null
+
+# Data
+data_config: data.json
+
+# Output
+output: perf.csv
+summary_output: null
+```
+
+### Scheduler Configs
+
+**`scheduler/local.yaml`:**
+```yaml
+# @package _global_
+# Local execution — no scheduler-specific config needed
+```
+
+**`scheduler/slurm.yaml`:**
+```yaml
+# @package _global_
+slurm:
+  partition: amd-rccl
+  nodes: 1
+  gpus_per_node: 8
+  time: "24:00:00"
+  output_dir: ./slurm_results
+  exclusive: true
+  modules: []
+  account: null
+  qos: null
+  constraint: null
+  nodelist: null
+  exclude: null
+  results_dir: null
+  shared_workspace: null
+  network_interface: null
+
+env_vars:
+  OMP_NUM_THREADS: "8"
+  MIOPEN_FIND_MODE: "1"
+```
+
+**`scheduler/k8s.yaml`:**
+```yaml
+# @package _global_
+k8s:
+  kubeconfig: ~/.kube/config
+  namespace: default
+  image_pull_policy: Always
+  backoff_limit: 3
+  ttl_seconds_after_finished: null
+  allow_privileged_profiling: null
+  gpu_count: null
+  gpu_resource_name: amd.com/gpu
+  memory: null
+  memory_limit: null
+  cpu: null
+  cpu_limit: null
+  host_ipc: true
+  node_selector: {}
+  tolerations: []
+  nfs_storage_class: nfs-banff
+  local_path_storage_class: local-path
+  data_storage_class: nfs-banff
+  recreate_shared_data_pvc: false
+  results_pvc: null
+  data_pvc: null
+  output_dir: null
+  secrets:
+    strategy: from_local_credentials
+    image_pull_secret_names: []
+    runtime_secret_name: null
+
+env_vars:
+  OMP_NUM_THREADS: "8"
+```
+
+### Hardware Configs
+
+**`hardware/amd.yaml`:**
+```yaml
+# @package _global_
+gpu_vendor: AMD
+guest_os: UBUNTU
+
+runtime:
+  devices:
+    - /dev/kfd
+    - /dev/dri
+    - /dev/infiniband
+  capabilities:
+    - SYS_PTRACE
+  security_opts:
+    - seccomp=unconfined
+  network_mode: host
+  ipc: host
+  groups:
+    - video
+  use_gpu_flag: false
+```
+
+**`hardware/nvidia.yaml`:**
+```yaml
+# @package _global_
+gpu_vendor: NVIDIA
+guest_os: UBUNTU
+
+runtime:
+  devices: []
+  capabilities: []
+  security_opts: []
+  network_mode: host
+  ipc: host
+  groups: []
+  use_gpu_flag: true
+```
+
+**`hardware/cpu.yaml`:**
+```yaml
+# @package _global_
+gpu_vendor: null
+guest_os: UBUNTU
+
+runtime:
+  devices: []
+  capabilities: []
+  security_opts: []
+  network_mode: null
+  ipc: null
+  groups: []
+  use_gpu_flag: false
+```
+
+### Launcher Configs
+
+**`launcher/none.yaml`:**
+```yaml
+# @package _global_
+distributed:
+  enabled: false
+```
+
+**`launcher/torchrun.yaml`:**
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: torchrun
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
+  port: 29500
+```
+
+**`launcher/vllm.yaml`:**
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: vllm
+  nnodes: 1
+  nproc_per_node: 4
+
+vllm:
+  kv_cache_size: 0.7
+  max_model_len: null
+  tensor_parallel_size: null
+```
+
+**`launcher/sglang_disagg.yaml`:**
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: sglang-disagg
+  backend: nccl
+  nnodes: 3
+  nproc_per_node: 8
+  port: 29500
+
+sglang_disagg:
+  prefill_nodes: null
+  decode_nodes: null
+  transfer_backend: mooncake
+```
+
+**`launcher/deepspeed.yaml`:**
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: deepspeed
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
+```
+
+**`launcher/megatron.yaml`:**
+```yaml
+# @package _global_
+distributed:
+  enabled: true
+  launcher: torchrun
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
+```
+
+### Profile Configs (append-only)
+
+**`profile/mi300x_8gpu.yaml`:**
+```yaml
+# @package _global_
+# Use: +profile=mi300x_8gpu
+# Note: profile keys use gpu_* prefix to avoid collision with hardware/ config group
+gpu_type: mi300x
+gpu_memory_gb: 192
+gpus_per_node: 8
+
+distributed:
+  nproc_per_node: 8
+
+env_vars:
+  GPU_MAX_HW_QUEUES: "2"
+  HSA_ENABLE_SDMA: "0"
+  HSA_FORCE_FINE_GRAIN_PCIE: "1"
+```
+
+### Env Configs (append-only)
+
+**`env/infiniband.yaml`:**
+```yaml
+# @package _global_
+# Use: +env=infiniband
+env_vars:
+  NCCL_IB_DISABLE: "0"
+  NCCL_IB_HCA: "mlx5_0:1,mlx5_1:1"
+  NCCL_SOCKET_IFNAME: ib0
+  NCCL_NET_GDR_LEVEL: 3
+```
+
+**`env/nccl_debug.yaml`:**
+```yaml
+# @package _global_
+# Use: +env=nccl_debug
+env_vars:
+  NCCL_DEBUG: INFO
+  NCCL_DEBUG_SUBSYS: "INIT,NET,GRAPH"
+  TORCH_DISTRIBUTED_DEBUG: DETAIL
+```
+
+### Tools Configs (append-only)
+
+**`tools/rocprofv3_comprehensive.yaml`:**
+```yaml
+# @package _global_
+# Use: +tools=rocprofv3_comprehensive
+tools:
+  - name: rocprofv3_full
+    env_vars:
+      RCCL_DEBUG: INFO
+      HSA_ENABLE_SDMA: "0"
+  - name: gpu_info_power_profiler
+    env_vars:
+      POWER_DEVICE: all
+      POWER_SAMPLING_RATE: "0.1"
+      POWER_DUAL_GCD: "false"
+  - name: gpu_info_vram_profiler
+    env_vars:
+      VRAM_DEVICE: all
+      VRAM_SAMPLING_RATE: "0.1"
+  - name: miopen_trace
+  - name: rocblas_trace
+```
+
+---
+
+## Internal Architecture
+
+### New Module: `src/madengine/config/`
+
+```
+src/madengine/config/
+├── __init__.py              # Public API: load_config()
+├── loader.py                # HydraConfigLoader: Compose API integration
+├── translator.py            # Maps clean YAML keys → internal additional_context dict
+└── schema.py                # Config validation
+```
+
+### Data Flow
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                        CLI Layer                                 │
+│                                                                  │
+│  --config file.yaml key=val    → config_args: List[str]         │
+│  --tags llama3                 → tags: List[str]                │
+│  --timeout 3600                → timeout: int                   │
+│  --additional-context '{...}'  → additional_context: str        │
+│                                                                  │
+└──────────────────────┬──────────────────────────────────────────┘
+                       │
+                       ▼
+┌─────────────────────────────────────────────────────────────────┐
+│               HydraConfigLoader.load(config_args)                │
+│                                                                  │
+│  1. Separate file path from Hydra overrides                     │
+│  2. initialize_config_dir("pkg://madengine.configs")            │
+│  3. compose(config_name="config", overrides=[...])              │
+│  4. If user YAML file: OmegaConf.merge(cfg, user_cfg)          │
+│  5. Return DictConfig                                           │
+│                                                                  │
+└──────────────────────┬──────────────────────────────────────────┘
+                       │
+                       ▼
+┌─────────────────────────────────────────────────────────────────┐
+│            ConfigTranslator.to_additional_context(cfg)           │
+│                                                                  │
+│  Maps clean YAML keys to internal additional_context format:    │
+│                                                                  │
+│    YAML Key                    → Internal Key                   │
+│    ─────────────────────────     ──────────────────────          │
+│    docker.build_args           → docker_build_arg               │
+│    docker.env_vars             → docker_env_vars                │
+│    docker.mounts               → docker_mounts                  │
+│    docker.gpus                 → docker_gpus                    │
+│    docker.cpus                 → docker_cpus                    │
+│    docker.additional_run_options → additional_docker_run_options │
+│    model.container_image       → MAD_CONTAINER_IMAGE            │
+│    log_error.pattern_scan      → log_error_pattern_scan         │
+│    log_error.benign_patterns   → log_error_benign_patterns      │
+│    log_error.patterns          → log_error_patterns             │
+│    runtime.*                   → (Context runtime settings)     │
+│                                                                  │
+│  Passthrough keys (no translation):                             │
+│    gpu_vendor, guest_os, env_vars, tools, pre_scripts,          │
+│    post_scripts, encapsulate_script, debug, slurm, k8s,         │
+│    distributed, vllm, sglang_disagg, shared_data                │
+│                                                                  │
+│  Extracted (not in additional_context):                          │
+│    model.tags → returned separately for orchestrator            │
+│    model.manifest_file → returned separately                    │
+│    model.timeout → returned separately                          │
+│    build.registry → returned separately                         │
+│    build.target_archs → returned separately                     │
+│                                                                  │
+│  Returns: (additional_context: dict, metadata: dict)            │
+│                                                                  │
+└──────────────────────┬──────────────────────────────────────────┘
+                       │
+                       ▼
+┌─────────────────────────────────────────────────────────────────┐
+│                    Merge Layer                                    │
+│                                                                  │
+│  1. Start with translated config dict                           │
+│  2. CLI args override equivalent config keys:                   │
+│     --tags provided?    → overrides model.tags                  │
+│     --timeout provided? → overrides model.timeout               │
+│  3. --additional-context merged on top (highest priority)       │
+│  4. Result = final additional_context dict                      │
+│                                                                  │
+└──────────────────────┬──────────────────────────────────────────┘
+                       │
+                       ▼
+┌─────────────────────────────────────────────────────────────────┐
+│              Existing Pipeline (unchanged)                        │
+│                                                                  │
+│  BuildOrchestrator(args) → Context(repr(merged_dict))           │
+│  RunOrchestrator(args) → ContainerRunner / DeploymentFactory    │
+│                                                                  │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### Merge Precedence (lowest → highest)
+
+1. **Config group defaults** — `config.yaml` defaults list
+2. **Selected config groups** — `scheduler=slurm`, `launcher=torchrun`
+3. **Appended config groups** — `+profile=mi300x_8gpu`, `+env=nccl_tuned`
+4. **User YAML file** — if `--config /path/to/file.yaml`
+5. **Inline Hydra overrides** — `distributed.nnodes=4`
+6. **CLI args** — `--tags`, `--timeout` (override equivalent config keys)
+7. **`--additional-context`** — highest priority (backward compatibility)
+
+### HydraConfigLoader
+
+```python
+from hydra import compose, initialize_config_dir
+from hydra.core.global_hydra import GlobalHydra
+from omegaconf import OmegaConf, DictConfig
+from pathlib import Path
+import importlib.resources
+
+class HydraConfigLoader:
+    """Loads madengine config using Hydra's Compose API."""
+
+    @staticmethod
+    def load(config_args: list[str]) -> DictConfig:
+        """Load and compose config from Hydra overrides and/or user YAML.
+
+        Args:
+            config_args: Mix of Hydra overrides and optional user YAML path.
+                Examples:
+                  ["scheduler=slurm", "launcher=torchrun", "distributed.nnodes=4"]
+                  ["/path/to/my_job.yaml"]
+                  ["/path/to/my_job.yaml", "distributed.nnodes=8"]
+
+        Returns:
+            Composed DictConfig with all merges applied.
+        """
+        user_file, overrides = HydraConfigLoader._parse_args(config_args)
+
+        # Resolve package config directory
+        config_dir = str(
+            importlib.resources.files("madengine") / "configs"
+        )
+
+        # Clear any previous Hydra state
+        GlobalHydra.instance().clear()
+
+        with initialize_config_dir(
+            config_dir=config_dir, version_base=None
+        ):
+            cfg = compose(config_name="config", overrides=overrides)
+
+        # Merge user file on top if provided
+        if user_file:
+            user_cfg = OmegaConf.load(user_file)
+            OmegaConf.set_struct(cfg, False)
+            cfg = OmegaConf.merge(cfg, user_cfg)
+
+        return cfg
+
+    @staticmethod
+    def _parse_args(config_args: list[str]) -> tuple[str | None, list[str]]:
+        """Separate user YAML file path from Hydra overrides."""
+        user_file = None
+        overrides = []
+        for arg in config_args:
+            if (
+                arg.endswith(('.yaml', '.yml'))
+                and '=' not in arg
+                and not arg.startswith('+')
+            ):
+                if user_file:
+                    raise ConfigurationError(
+                        "Only one YAML config file allowed"
+                    )
+                user_file = arg
+            else:
+                overrides.append(arg)
+        return user_file, overrides
+```
+
+### ConfigTranslator
+
+```python
+class ConfigTranslator:
+    """Translates clean YAML config to internal additional_context format."""
+
+    # YAML key → internal key mapping (only for keys that differ)
+    KEY_MAP = {
+        "docker.build_args": "docker_build_arg",
+        "docker.env_vars": "docker_env_vars",
+        "docker.mounts": "docker_mounts",
+        "docker.gpus": "docker_gpus",
+        "docker.cpus": "docker_cpus",
+        "docker.additional_run_options": "additional_docker_run_options",
+        "log_error.pattern_scan": "log_error_pattern_scan",
+        "log_error.benign_patterns": "log_error_benign_patterns",
+        "log_error.patterns": "log_error_patterns",
+    }
+
+    # Keys extracted from config (not part of additional_context)
+    EXTRACTED_KEYS = {
+        "model", "build", "platform", "output",
+        "summary_output", "data_config", "live_output",
+    }
+
+    @classmethod
+    def to_additional_context(
+        cls, cfg: DictConfig
+    ) -> tuple[dict, dict]:
+        """Convert DictConfig to (additional_context, metadata) tuple.
+
+        additional_context: dict in the format expected by existing pipeline.
+        metadata: dict with model.tags, build.registry, etc. for the CLI layer.
+        """
+        raw = OmegaConf.to_container(cfg, resolve=True)
+
+        context = {}
+        metadata = {}
+
+        for key, value in raw.items():
+            if key in cls.EXTRACTED_KEYS:
+                metadata[key] = value
+            elif key == "docker":
+                # Flatten docker.* to docker_* keys
+                for subkey, subval in value.items():
+                    internal_key = cls.KEY_MAP.get(
+                        f"docker.{subkey}", f"docker_{subkey}"
+                    )
+                    if subval is not None:
+                        context[internal_key] = subval
+            elif key == "log_error":
+                for subkey, subval in value.items():
+                    internal_key = cls.KEY_MAP.get(
+                        f"log_error.{subkey}", f"log_error_{subkey}"
+                    )
+                    context[internal_key] = subval
+            elif key == "runtime":
+                # Runtime settings stored separately, applied to Context
+                metadata["runtime"] = value
+            else:
+                # Passthrough: gpu_vendor, guest_os, env_vars, slurm,
+                # k8s, distributed, tools, pre_scripts, etc.
+                if value is not None:
+                    context[key] = value
+
+        # Extract MAD_CONTAINER_IMAGE from model metadata
+        model = metadata.get("model", {})
+        if model and model.get("container_image"):
+            context["MAD_CONTAINER_IMAGE"] = model["container_image"]
+
+        return context, metadata
+```
+
+### Config Validation (`schema.py`)
+
+```python
+class ConfigValidator:
+    """Validates composed config for consistency."""
+
+    @staticmethod
+    def validate(cfg: DictConfig) -> list[str]:
+        """Return list of validation errors (empty = valid)."""
+        errors = []
+
+        # Cross-field: scheduler=slurm must have slurm section
+        scheduler = cfg.get("scheduler", {})
+        # (Hydra handles this via config group selection)
+
+        # Conflict: can't have both slurm and k8s
+        if cfg.get("slurm") and cfg.get("k8s"):
+            errors.append(
+                "Cannot specify both 'slurm' and 'k8s' sections"
+            )
+
+        # Distributed: if enabled, must have launcher
+        dist = cfg.get("distributed", {})
+        if dist.get("enabled") and not dist.get("launcher"):
+            errors.append(
+                "distributed.enabled=true requires distributed.launcher"
+            )
+
+        # Type checks
+        if dist.get("nnodes") is not None:
+            if not isinstance(dist["nnodes"], int) or dist["nnodes"] < 1:
+                errors.append("distributed.nnodes must be a positive integer")
+
+        # Warn on unknown top-level keys
+        known_keys = {
+            "defaults", "platform", "scheduler", "hardware", "launcher",
+            "model", "docker", "build", "env_vars", "debug", "live_output",
+            "log_error", "tools", "pre_scripts", "post_scripts",
+            "encapsulate_script", "data_config", "output", "summary_output",
+            "gpu_vendor", "guest_os", "runtime", "slurm", "k8s",
+            "kubernetes", "distributed", "vllm", "sglang_disagg",
+            "shared_data", "timeout",
+        }
+        for key in cfg:
+            if key not in known_keys:
+                errors.append(f"Unknown config key: '{key}'")
+
+        return errors
+```
+
+---
+
+## CLI Integration
+
+### Changes to `commands/run.py`
+
+```python
+def run(
+    tags: Annotated[...] = [],
+    # ... existing args ...
+    config: Annotated[
+        Optional[List[str]],
+        typer.Option(
+            "--config",
+            help=(
+                "YAML config file and/or Hydra overrides. "
+                "Examples: --config my_job.yaml, "
+                "--config scheduler=slurm launcher=torchrun, "
+                "--config my_job.yaml distributed.nnodes=4"
+            ),
+        ),
+    ] = None,
+    additional_context: Annotated[...] = "{}",
+    # ... rest of existing args ...
+):
+    if config:
+        from madengine.config import load_config
+        config_ctx, config_meta = load_config(config)
+
+        # Extract model selection from config (CLI args override)
+        if not tags and config_meta.get("model", {}).get("tags"):
+            tags = config_meta["model"]["tags"]
+        if timeout == DEFAULT_TIMEOUT and config_meta.get("model", {}).get("timeout"):
+            timeout = config_meta["model"]["timeout"]
+        if not manifest_file and config_meta.get("model", {}).get("manifest_file"):
+            manifest_file = config_meta["model"]["manifest_file"]
+        if not registry and config_meta.get("build", {}).get("registry"):
+            registry = config_meta["build"]["registry"]
+
+        # Merge: config_ctx is base, additional_context overrides
+        parsed_ac = ast.literal_eval(additional_context) if additional_context != "{}" else {}
+        merged = deep_merge(config_ctx, parsed_ac)
+        additional_context = repr(merged)
+
+    # ... rest of existing run logic (unchanged) ...
+```
+
+### Changes to `commands/build.py`
+
+Same pattern: add `--config` parameter, extract build-relevant metadata, merge with `additional_context`.
+
+---
+
+## Usage Examples
+
+### Single-file workflow (most common)
+
+```yaml
+# my_slurm_training.yaml
+defaults:
+  - /scheduler: slurm
+  - /launcher: torchrun
+  - /hardware: amd
+  - _self_
+
+model:
+  tags: [megatron_llama3_70b]
+
+slurm:
+  partition: gpu-cluster
+  nodes: 4
+  gpus_per_node: 8
+  time: "48:00:00"
+  modules: [rocm/6.2.0]
+
+distributed:
+  nnodes: 4
+  nproc_per_node: 8
+
+env_vars:
+  NCCL_DEBUG: WARN
+  GPU_MAX_HW_QUEUES: "2"
+  HSA_ENABLE_SDMA: "0"
+```
+
+```bash
+madengine run --config my_slurm_training.yaml
+```
+
+### Config groups + inline overrides (no file)
+
+```bash
+# SLURM multi-node with torchrun
+madengine run --config scheduler=slurm launcher=torchrun \
+  model.tags=[llama3] distributed.nnodes=4 slurm.partition=gpu-high
+
+# K8s vLLM inference with profiling
+madengine run --config scheduler=k8s launcher=vllm \
+  +tools=rocprofv3_lightweight k8s.namespace=ml-inference \
+  model.tags=[vllm_llama]
+
+# Local single-GPU (all defaults, just select model)
+madengine run --config model.tags=[dummy]
+```
+
+### File + overrides
+
+```bash
+# Base config from file, override node count
+madengine run --config my_slurm_training.yaml distributed.nnodes=8
+
+# Base config + add profiling tools
+madengine run --config my_slurm_training.yaml +tools=power_profiler
+```
+
+### Backward compatible
+
+```bash
+# --additional-context still works, overrides --config
+madengine run --config my_slurm_training.yaml \
+  --additional-context '{"slurm": {"partition": "override-partition"}}'
+
+# Pure --additional-context (no --config) still works exactly as before
+madengine run --tags dummy -c '{"gpu_vendor": "AMD"}'
+```
+
+### Future: bare metal
+
+```bash
+# No Docker — direct execution on host
+madengine run --config platform=bare_metal scheduler=slurm \
+  launcher=torchrun model.tags=[benchmark]
+```
+
+---
+
+## Migration Path
+
+### Phase 1: Add --config alongside --additional-context
+- Both coexist; `--additional-context` has highest priority
+- Existing JSON example configs can be converted to YAML (1:1 mapping via translator)
+- No breaking changes
+
+### Phase 2: Convert existing JSON presets to YAML configs
+- `deployment/presets/k8s/defaults.json` → `configs/scheduler/k8s.yaml`
+- `deployment/presets/slurm/defaults.json` → `configs/scheduler/slurm.yaml`
+- `deployment/presets/k8s/profiles/` → `configs/profile/` YAML files
+- `examples/profiling-configs/*.json` → `configs/tools/` YAML files
+- `examples/k8s-configs/*.json` → example YAML files in `examples/`
+
+### Phase 3: Deprecate --additional-context (future)
+- Emit deprecation warning when `--additional-context` is used
+- Eventually remove in a major version
+
+---
+
+## Dependencies
+
+Add to `pyproject.toml`:
+
+```toml
+dependencies = [
+    # ... existing ...
+    "hydra-core>=1.3",
+    "omegaconf>=2.3",
+]
+```
+
+Both are pure Python with minimal transitive dependencies. `omegaconf` is already a dependency of `hydra-core`.
+
+---
+
+## Testing Strategy
+
+### Unit Tests
+
+- `test_loader.py`: HydraConfigLoader with various override combinations
+- `test_translator.py`: ConfigTranslator key mapping, passthrough, extraction
+- `test_schema.py`: Validation rules (conflicts, unknown keys, type checks)
+- `test_merge.py`: Merge precedence (config < CLI < additional_context)
+
+### Integration Tests
+
+- End-to-end: `--config scheduler=slurm` produces correct `additional_context`
+- File + overrides: `--config file.yaml key=value` merges correctly
+- Backward compat: `--additional-context` without `--config` unchanged
+- Both: `--config` + `--additional-context` merges with correct precedence
+
+### Fixture Configs
+
+- Add YAML equivalents of existing test fixture JSON files
+- Test each config group individually and in combination
+
+---
+
+## Files to Create
+
+| File | Purpose |
+|------|---------|
+| `src/madengine/config/__init__.py` | Public API |
+| `src/madengine/config/loader.py` | HydraConfigLoader |
+| `src/madengine/config/translator.py` | ConfigTranslator |
+| `src/madengine/config/schema.py` | ConfigValidator |
+| `src/madengine/configs/config.yaml` | Root config |
+| `src/madengine/configs/platform/*.yaml` | Platform configs |
+| `src/madengine/configs/scheduler/*.yaml` | Scheduler configs |
+| `src/madengine/configs/hardware/*.yaml` | Hardware configs |
+| `src/madengine/configs/launcher/*.yaml` | Launcher configs |
+| `src/madengine/configs/profile/*.yaml` | Hardware profiles |
+| `src/madengine/configs/env/*.yaml` | Env var presets |
+| `src/madengine/configs/tools/*.yaml` | Profiling tool configs |
+| `src/madengine/configs/data/*.yaml` | Data provider configs |
+| `src/madengine/configs/build/*.yaml` | Build setting configs |
+| `tests/unit/test_config_loader.py` | Loader tests |
+| `tests/unit/test_config_translator.py` | Translator tests |
+| `tests/unit/test_config_schema.py` | Validation tests |
+
+## Files to Modify
+
+| File | Change |
+|------|--------|
+| `pyproject.toml` | Add hydra-core, omegaconf dependencies |
+| `src/madengine/cli/commands/run.py` | Add `--config` parameter, integration logic |
+| `src/madengine/cli/commands/build.py` | Add `--config` parameter, integration logic |
diff --git a/docs/usage.md b/docs/usage.md
index 010a6b6c..b8570ac5 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -27,12 +27,16 @@ madengine discover --tags dummy
 # Run locally (full workflow: discover/build/run as configured by the model)
 madengine run --tags dummy
 
-# Or with explicit configuration
+# Or with explicit JSON configuration
 madengine run --tags dummy \
   --additional-context '{"gpu_vendor": "AMD", "guest_os": "UBUNTU"}'
+
+# Or with YAML config (composable, Hydra-based)
+madengine run --tags dummy --config scheduler=slurm --config launcher=torchrun
+madengine run --config my_job.yaml
 ```
 
-> **Note**: `gpu_vendor` defaults to `AMD` and `guest_os` defaults to `UBUNTU` for build operations. For production or non-AMD/Ubuntu environments, specify these values explicitly.
+> **Note**: `--config` is mutually exclusive with `--additional-context` / `--additional-context-file`. `gpu_vendor` defaults to `AMD` and `guest_os` defaults to `UBUNTU` for build operations.
 
 Results are saved to `perf_entry.csv`.
 
@@ -395,6 +399,8 @@ Deployment target is automatically detected from `slurm` key in configuration. T
 
 Use configuration files for complex settings:
 
+**JSON format** (`--additional-context-file`):
+
 **config.json:**
 ```json
 {
@@ -412,6 +418,42 @@ Use configuration files for complex settings:
 madengine run --tags model --additional-context-file config.json
 ```
 
+**YAML format** (`--config`):
+
+**my_job.yaml:**
+```yaml
+model:
+  tags: [my_model]
+  timeout: 3600
+
+debug: true
+
+env_vars:
+  PYTORCH_TUNABLEOP_ENABLED: "1"
+  HSA_ENABLE_SDMA: "0"
+
+distributed:
+  enabled: true
+  launcher: torchrun
+  nnodes: 2
+  nproc_per_node: 4
+```
+
+```bash
+madengine run --config my_job.yaml
+
+# With additional overrides
+madengine run --config my_job.yaml --config distributed.nnodes=4
+
+# Or use config groups without a file
+madengine run --tags model \
+  --config scheduler=slurm \
+  --config launcher=torchrun \
+  --config +profile=mi300x_8gpu
+```
+
+> `--config` is mutually exclusive with `--additional-context` / `--additional-context-file`. See [Configuration Guide — YAML Configuration](configuration.md#yaml-configuration-config) for config groups and full details, and [`examples/configs/`](../examples/configs/) for annotated templates and ready-to-run demos.
+
 ### Custom Timeouts
 
 ```bash
diff --git a/examples/build-manifest/batch.json b/examples/build-manifest/batch.json
index 8996e43b..16f66d77 100644
--- a/examples/build-manifest/batch.json
+++ b/examples/build-manifest/batch.json
@@ -21,4 +21,3 @@
     "build_new": false
   }
 ]
-
diff --git a/examples/build-manifest/ci_incremental.json b/examples/build-manifest/ci_incremental.json
index af83ee86..715a6aed 100644
--- a/examples/build-manifest/ci_incremental.json
+++ b/examples/build-manifest/ci_incremental.json
@@ -20,4 +20,3 @@
     "build_new": false
   }
 ]
-
diff --git a/examples/configs/README.md b/examples/configs/README.md
new file mode 100644
index 00000000..8c4e1b80
--- /dev/null
+++ b/examples/configs/README.md
@@ -0,0 +1,95 @@
+# YAML Config Examples (`--config`)
+
+```
+configs/
+├── templates/   # Full reference — every field shown and annotated
+│   ├── local.yaml
+│   ├── slurm.yaml
+│   └── k8s.yaml
+└── demo/        # Minimal ready-to-run examples organised by target
+    ├── local/
+    ├── slurm/
+    └── k8s/
+```
+
+## Workflow
+
+**Starting from scratch** — copy a template, fill in your model tag and cluster
+settings, then delete the sections you don't need:
+
+```bash
+cp examples/configs/templates/slurm.yaml my_job.yaml
+# edit my_job.yaml …
+madengine run --config my_job.yaml
+```
+
+**Starting from an example** — find the demo closest to your use case and
+adapt it:
+
+```bash
+cp examples/configs/demo/slurm/multi-node-torchrun.yaml my_job.yaml
+# tweak partition, node count, tags …
+madengine run --config my_job.yaml
+```
+
+**Inline overrides** — any field can be overridden without editing the file:
+
+```bash
+madengine run --config my_job.yaml --config distributed.nnodes=4
+madengine run --config my_job.yaml --config +env=nccl_debug
+madengine run --config my_job.yaml --config +tools=rocprofv3_lightweight
+```
+
+> `--config` is mutually exclusive with `--additional-context` /
+> `--additional-context-file`. See `docs/configuration.md` for the full
+> field reference.
+
+---
+
+## `templates/`
+
+| File | Target | Contents |
+|------|--------|----------|
+| `local.yaml` | Local Docker | All docker, model, tools, scripts, log-error, output fields |
+| `slurm.yaml` | SLURM | All slurm, distributed, env_vars, tools, scripts fields |
+| `k8s.yaml` | Kubernetes | All k8s, distributed, env_vars, tools, secrets, storage fields |
+
+## `demo/local/`
+
+| File | Model | Description |
+|------|-------|-------------|
+| `single-gpu.yaml` | `dummy` | Single GPU, no distribution |
+| `multi-gpu-torchrun.yaml` | `dummy_torchrun` | Single node, 4 GPUs, torchrun |
+| `deepspeed.yaml` | `dummy_deepspeed` | DeepSpeed ZeRO, single node |
+| `vllm-inference.yaml` | `dummy_vllm` | vLLM tensor parallelism, 4 GPUs |
+| `profiling.yaml` | `dummy` | ROCprofv3 + power + VRAM profiling |
+
+## `demo/slurm/`
+
+| File | Model | Description |
+|------|-------|-------------|
+| `single-node-single-gpu.yaml` | `dummy` | Single GPU job |
+| `multi-node-torchrun.yaml` | `dummy_torchrun` | 2 nodes × 8 GPUs, Ethernet |
+| `multi-node-torchrun-infiniband.yaml` | `dummy_torchrun` | 4 nodes × 8 GPUs, InfiniBand, account/QoS |
+| `deepspeed.yaml` | `dummy_deepspeed` | DeepSpeed, single node |
+| `megatron-lm.yaml` | `dummy_megatron_lm` | Megatron-LM, 4 nodes × 8 GPUs |
+| `torchtitan.yaml` | `dummy_torchtitan` | TorchTitan TP+PP+FSDP2, 4 nodes × 8 GPUs |
+| `vllm-inference.yaml` | `dummy_vllm` | vLLM data parallelism, 2 nodes × 4 GPUs |
+| `sglang-inference.yaml` | `dummy_sglang` | SGLang, 2 nodes × 4 GPUs |
+| `sglang-disagg.yaml` | `dummy_sglang_disagg` | SGLang disaggregated prefill/decode, 5 nodes |
+| `profiling-multi-gpu.yaml` | `dummy_torchrun` | torchrun + RCCL + power + VRAM profiling |
+
+## `demo/k8s/`
+
+| File | Model | Description |
+|------|-------|-------------|
+| `single-gpu.yaml` | `dummy` | Single GPU pod |
+| `multi-gpu-torchrun.yaml` | `dummy_torchrun` | 1 pod × 8 GPUs, torchrun |
+| `multi-node-torchrun.yaml` | `dummy_torchrun` | 2 pods × 8 GPUs, node selector |
+| `nvidia-gpu.yaml` | `dummy_torchrun` | NVIDIA A100/H100, `nvidia.com/gpu` |
+| `deepspeed.yaml` | `dummy_deepspeed` | DeepSpeed, single pod |
+| `megatron-lm.yaml` | `dummy_megatron_lm` | Megatron-LM, 4 pods × 8 GPUs |
+| `torchtitan.yaml` | `dummy_torchtitan` | TorchTitan TP+PP+FSDP2, 4 pods × 8 GPUs |
+| `vllm-inference.yaml` | `dummy_vllm` | vLLM data parallelism, 2 pods × 4 GPUs |
+| `sglang-inference.yaml` | `dummy_sglang` | SGLang, 2 pods × 4 GPUs |
+| `sglang-disagg.yaml` | `dummy_sglang_disagg` | SGLang disaggregated, 5 pods |
diff --git a/examples/configs/demo/k8s/deepspeed.yaml b/examples/configs/demo/k8s/deepspeed.yaml
new file mode 100644
index 00000000..fce15b80
--- /dev/null
+++ b/examples/configs/demo/k8s/deepspeed.yaml
@@ -0,0 +1,23 @@
+# Kubernetes — DeepSpeed ZeRO distributed training
+# madengine run --config examples/configs/demo/k8s/deepspeed.yaml
+
+model:
+  tags: [dummy_deepspeed]
+
+k8s:
+  namespace: default
+  gpu_count: 4
+  memory: 128Gi
+  memory_limit: 256Gi
+  cpu: "32"
+  cpu_limit: "64"
+  host_ipc: true
+
+distributed:
+  enabled: true
+  launcher: deepspeed
+  nnodes: 1
+  nproc_per_node: 4
+
+env_vars:
+  OMP_NUM_THREADS: "8"
diff --git a/examples/configs/demo/k8s/megatron-lm.yaml b/examples/configs/demo/k8s/megatron-lm.yaml
new file mode 100644
index 00000000..c315963a
--- /dev/null
+++ b/examples/configs/demo/k8s/megatron-lm.yaml
@@ -0,0 +1,28 @@
+# Kubernetes — Megatron-LM large-scale transformer training (4 pods × 8 GPUs)
+# madengine run --config examples/configs/demo/k8s/megatron-lm.yaml
+
+model:
+  tags: [dummy_megatron_lm]
+
+k8s:
+  namespace: ml-training
+  gpu_count: 8
+  memory: 256Gi
+  memory_limit: 512Gi
+  cpu: "64"
+  cpu_limit: "128"
+  host_ipc: true
+  image_pull_policy: IfNotPresent
+  node_selector:
+    feature.node.kubernetes.io/amd-gpu-mi300x: "true"
+
+distributed:
+  enabled: true
+  launcher: megatron
+  nnodes: 4
+  nproc_per_node: 8
+  master_port: 29500
+
+env_vars:
+  OMP_NUM_THREADS: "16"
+  NCCL_DEBUG: INFO
diff --git a/examples/configs/demo/k8s/multi-gpu-torchrun.yaml b/examples/configs/demo/k8s/multi-gpu-torchrun.yaml
new file mode 100644
index 00000000..03e68264
--- /dev/null
+++ b/examples/configs/demo/k8s/multi-gpu-torchrun.yaml
@@ -0,0 +1,34 @@
+# Kubernetes — single-node multi-GPU with torchrun (1 pod × 8 GPUs)
+# madengine run --config examples/configs/demo/k8s/multi-gpu-torchrun.yaml
+
+model:
+  tags: [dummy_torchrun]
+
+k8s:
+  namespace: default
+  gpu_count: 8
+  memory: 256Gi
+  memory_limit: 384Gi
+  cpu: "64"
+  cpu_limit: "96"
+  host_ipc: true
+
+distributed:
+  enabled: true
+  launcher: torchrun
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
+
+env_vars:
+  OMP_NUM_THREADS: "8"
+  NCCL_DEBUG: WARN
+  NCCL_IB_DISABLE: "1"
+  NCCL_SOCKET_IFNAME: eth0
+  TORCH_NCCL_HIGH_PRIORITY: "1"
+  GPU_MAX_HW_QUEUES: "2"
+  HSA_ENABLE_SDMA: "0"
+  HSA_FORCE_FINE_GRAIN_PCIE: "1"
+  MIOPEN_FIND_MODE: "1"
+  MIOPEN_USER_DB_PATH: /tmp/.miopen
+  RCCL_ENABLE_HIPGRAPH: "0"
diff --git a/examples/configs/demo/k8s/multi-node-torchrun.yaml b/examples/configs/demo/k8s/multi-node-torchrun.yaml
new file mode 100644
index 00000000..f1789f2a
--- /dev/null
+++ b/examples/configs/demo/k8s/multi-node-torchrun.yaml
@@ -0,0 +1,39 @@
+# Kubernetes — multi-node torchrun (2 pods × 8 GPUs = 16 GPUs total)
+# madengine run --config examples/configs/demo/k8s/multi-node-torchrun.yaml
+
+model:
+  tags: [dummy_torchrun]
+
+k8s:
+  namespace: default
+  gpu_count: 8
+  memory: 256Gi
+  memory_limit: 384Gi
+  cpu: "64"
+  cpu_limit: "96"
+  host_ipc: true
+  node_selector:
+    feature.node.kubernetes.io/amd-gpu-mi300x: "true"
+
+distributed:
+  enabled: true
+  launcher: torchrun
+  nnodes: 2
+  nproc_per_node: 8
+  master_port: 29500
+
+env_vars:
+  OMP_NUM_THREADS: "8"
+  NCCL_DEBUG: WARN
+  NCCL_DEBUG_SUBSYS: "INIT,NET"
+  NCCL_IB_DISABLE: "1"
+  NCCL_SOCKET_IFNAME: eth0
+  TORCH_NCCL_HIGH_PRIORITY: "1"
+  GPU_MAX_HW_QUEUES: "2"
+  TORCH_NCCL_ASYNC_ERROR_HANDLING: "1"
+  NCCL_TIMEOUT: "600"
+  HSA_ENABLE_SDMA: "0"
+  HSA_FORCE_FINE_GRAIN_PCIE: "1"
+  MIOPEN_FIND_MODE: "1"
+  MIOPEN_USER_DB_PATH: /tmp/.miopen
+  RCCL_ENABLE_HIPGRAPH: "0"
diff --git a/examples/configs/demo/k8s/nvidia-gpu.yaml b/examples/configs/demo/k8s/nvidia-gpu.yaml
new file mode 100644
index 00000000..4e518dee
--- /dev/null
+++ b/examples/configs/demo/k8s/nvidia-gpu.yaml
@@ -0,0 +1,32 @@
+# Kubernetes — NVIDIA GPU cluster (A100/H100), single node × 4 GPUs
+# madengine run --config examples/configs/demo/k8s/nvidia-gpu.yaml
+
+model:
+  tags: [dummy_torchrun]
+
+gpu_vendor: NVIDIA
+guest_os: UBUNTU
+
+k8s:
+  namespace: default
+  gpu_count: 4
+  gpu_resource_name: nvidia.com/gpu
+  memory: 128Gi
+  memory_limit: 256Gi
+  cpu: "48"
+  cpu_limit: "96"
+  node_selector:
+    accelerator: nvidia-tesla-a100
+
+distributed:
+  enabled: true
+  launcher: torchrun
+  nnodes: 1
+  nproc_per_node: 4
+  master_port: 29500
+
+env_vars:
+  OMP_NUM_THREADS: "12"
+  NCCL_DEBUG: WARN
+  NCCL_IB_DISABLE: "1"
+  NCCL_SOCKET_IFNAME: eth0
diff --git a/examples/configs/demo/k8s/sglang-disagg.yaml b/examples/configs/demo/k8s/sglang-disagg.yaml
new file mode 100644
index 00000000..f9ec614f
--- /dev/null
+++ b/examples/configs/demo/k8s/sglang-disagg.yaml
@@ -0,0 +1,38 @@
+# Kubernetes — SGLang disaggregated inference (5 pods: 1 proxy + 2 prefill + 2 decode)
+# madengine run --config examples/configs/demo/k8s/sglang-disagg.yaml
+#
+# To customize the prefill/decode split, set sglang_disagg.prefill_nodes and
+# sglang_disagg.decode_nodes (must sum to nnodes - 1 proxy node).
+
+model:
+  tags: [dummy_sglang_disagg]
+
+k8s:
+  namespace: default
+  gpu_count: 8
+  memory: 256Gi
+  memory_limit: 384Gi
+  cpu: "64"
+  cpu_limit: "96"
+  host_ipc: true
+  node_selector:
+    feature.node.kubernetes.io/amd-gpu-mi300x: "true"
+
+distributed:
+  enabled: true
+  launcher: sglang-disagg
+  nnodes: 5
+  nproc_per_node: 8
+  master_port: 29500
+  sglang_disagg:
+    prefill_nodes: 2
+    decode_nodes: 2
+
+env_vars:
+  SGLANG_ALLOW_LONG_MAX_MODEL_LEN: "1"
+  SGLANG_ENABLE_RADIX_CACHE: "1"
+  SGLANG_RADIX_CACHE_SIZE: "0.9"
+  SGLANG_DISAGG_TRANSFER_BACKEND: mooncake
+  NCCL_TIMEOUT: "600"
+  RAY_health_check_timeout_ms: "60000"
+  MOONCAKE_TEST_MODE: "0"
diff --git a/examples/configs/demo/k8s/sglang-inference.yaml b/examples/configs/demo/k8s/sglang-inference.yaml
new file mode 100644
index 00000000..62c4579f
--- /dev/null
+++ b/examples/configs/demo/k8s/sglang-inference.yaml
@@ -0,0 +1,30 @@
+# Kubernetes — SGLang inference, tensor + data parallelism (2 pods × 4 GPUs)
+# madengine run --config examples/configs/demo/k8s/sglang-inference.yaml
+
+model:
+  tags: [dummy_sglang]
+
+k8s:
+  namespace: default
+  gpu_count: 4
+  memory: 256Gi
+  memory_limit: 384Gi
+  cpu: "64"
+  cpu_limit: "96"
+  host_ipc: true
+  node_selector:
+    feature.node.kubernetes.io/amd-gpu-mi300x: "true"
+
+distributed:
+  enabled: true
+  launcher: sglang
+  nnodes: 2
+  nproc_per_node: 4
+  master_port: 29500
+
+env_vars:
+  SGLANG_ALLOW_LONG_MAX_MODEL_LEN: "1"
+  SGLANG_ENABLE_RADIX_CACHE: "1"
+  SGLANG_RADIX_CACHE_SIZE: "0.9"
+  NCCL_TIMEOUT: "600"
+  RAY_health_check_timeout_ms: "60000"
diff --git a/examples/configs/demo/k8s/single-gpu.yaml b/examples/configs/demo/k8s/single-gpu.yaml
new file mode 100644
index 00000000..be528719
--- /dev/null
+++ b/examples/configs/demo/k8s/single-gpu.yaml
@@ -0,0 +1,16 @@
+# Kubernetes — single GPU pod (no distribution)
+# madengine run --config examples/configs/demo/k8s/single-gpu.yaml
+
+model:
+  tags: [dummy]
+
+k8s:
+  namespace: default
+  gpu_count: 1
+  memory: 16Gi
+  memory_limit: 32Gi
+  cpu: "8"
+  cpu_limit: "16"
+
+env_vars:
+  OMP_NUM_THREADS: "8"
diff --git a/examples/configs/demo/k8s/torchtitan.yaml b/examples/configs/demo/k8s/torchtitan.yaml
new file mode 100644
index 00000000..3268915e
--- /dev/null
+++ b/examples/configs/demo/k8s/torchtitan.yaml
@@ -0,0 +1,28 @@
+# Kubernetes — TorchTitan LLM pre-training with TP + PP + FSDP2 (4 pods × 8 GPUs)
+# madengine run --config examples/configs/demo/k8s/torchtitan.yaml
+
+model:
+  tags: [dummy_torchtitan]
+
+k8s:
+  namespace: ml-training
+  gpu_count: 8
+  memory: 512Gi
+  memory_limit: 768Gi
+  cpu: "96"
+  cpu_limit: "128"
+  host_ipc: true
+  node_selector:
+    feature.node.kubernetes.io/amd-gpu-mi300x: "true"
+
+distributed:
+  enabled: true
+  launcher: torchtitan
+  nnodes: 4
+  nproc_per_node: 8
+  master_port: 29500
+
+env_vars:
+  PYTORCH_TUNABLEOP_ENABLED: "1"
+  PYTORCH_TUNABLEOP_TUNING: "1"
+  NCCL_DEBUG: INFO
diff --git a/examples/configs/demo/k8s/vllm-inference.yaml b/examples/configs/demo/k8s/vllm-inference.yaml
new file mode 100644
index 00000000..f9a44905
--- /dev/null
+++ b/examples/configs/demo/k8s/vllm-inference.yaml
@@ -0,0 +1,31 @@
+# Kubernetes — vLLM inference, data parallelism (2 pods × 4 GPUs, one replica per pod)
+# madengine run --config examples/configs/demo/k8s/vllm-inference.yaml
+
+model:
+  tags: [dummy_vllm]
+
+k8s:
+  namespace: default
+  gpu_count: 4
+  memory: 256Gi
+  memory_limit: 384Gi
+  cpu: "64"
+  cpu_limit: "96"
+  host_ipc: true
+  node_selector:
+    feature.node.kubernetes.io/amd-gpu-mi300x: "true"
+
+distributed:
+  enabled: true
+  launcher: vllm
+  nnodes: 2
+  nproc_per_node: 4
+  master_port: 29500
+
+env_vars:
+  VLLM_ALLOW_LONG_MAX_MODEL_LEN: "1"
+  VLLM_WORKER_MULTIPROC_METHOD: spawn
+  VLLM_KV_CACHE_SIZE: "0.7"
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  NCCL_TIMEOUT: "600"
+  RAY_health_check_timeout_ms: "60000"
diff --git a/examples/configs/demo/local/deepspeed.yaml b/examples/configs/demo/local/deepspeed.yaml
new file mode 100644
index 00000000..5a3bcd15
--- /dev/null
+++ b/examples/configs/demo/local/deepspeed.yaml
@@ -0,0 +1,14 @@
+# Local Docker — DeepSpeed ZeRO distributed training
+# madengine run --config examples/configs/demo/local/deepspeed.yaml
+
+model:
+  tags: [dummy_deepspeed]
+
+distributed:
+  enabled: true
+  launcher: deepspeed
+  nnodes: 1
+  nproc_per_node: 4
+
+env_vars:
+  OMP_NUM_THREADS: "8"
diff --git a/examples/configs/demo/local/multi-gpu-torchrun.yaml b/examples/configs/demo/local/multi-gpu-torchrun.yaml
new file mode 100644
index 00000000..fe2d5890
--- /dev/null
+++ b/examples/configs/demo/local/multi-gpu-torchrun.yaml
@@ -0,0 +1,16 @@
+# Local Docker — single node, multi-GPU with torchrun
+# madengine run --config examples/configs/demo/local/multi-gpu-torchrun.yaml
+
+model:
+  tags: [dummy_torchrun]
+
+distributed:
+  enabled: true
+  launcher: torchrun
+  nnodes: 1
+  nproc_per_node: 4
+  master_port: 29500
+
+env_vars:
+  OMP_NUM_THREADS: "8"
+  NCCL_DEBUG: WARN
diff --git a/examples/configs/demo/local/profiling.yaml b/examples/configs/demo/local/profiling.yaml
new file mode 100644
index 00000000..37f5ae30
--- /dev/null
+++ b/examples/configs/demo/local/profiling.yaml
@@ -0,0 +1,19 @@
+# Local Docker — single GPU with ROCm profiling
+# madengine run --config examples/configs/demo/local/profiling.yaml
+
+model:
+  tags: [dummy]
+
+tools:
+  - name: rocprofv3_lightweight
+  - name: gpu_info_power_profiler
+    env_vars:
+      POWER_DEVICE: all
+      POWER_SAMPLING_RATE: "0.1"
+  - name: gpu_info_vram_profiler
+    env_vars:
+      VRAM_DEVICE: all
+      VRAM_SAMPLING_RATE: "0.1"
+
+env_vars:
+  OMP_NUM_THREADS: "8"
diff --git a/examples/configs/demo/local/single-gpu.yaml b/examples/configs/demo/local/single-gpu.yaml
new file mode 100644
index 00000000..b2a8e39b
--- /dev/null
+++ b/examples/configs/demo/local/single-gpu.yaml
@@ -0,0 +1,8 @@
+# Local Docker — single GPU, no distribution
+# madengine run --config examples/configs/demo/local/single-gpu.yaml
+
+model:
+  tags: [dummy]
+
+env_vars:
+  OMP_NUM_THREADS: "8"
diff --git a/examples/configs/demo/local/vllm-inference.yaml b/examples/configs/demo/local/vllm-inference.yaml
new file mode 100644
index 00000000..86e207ea
--- /dev/null
+++ b/examples/configs/demo/local/vllm-inference.yaml
@@ -0,0 +1,16 @@
+# Local Docker — vLLM inference with tensor parallelism
+# madengine run --config examples/configs/demo/local/vllm-inference.yaml
+
+model:
+  tags: [dummy_vllm]
+
+distributed:
+  enabled: true
+  launcher: vllm
+  nnodes: 1
+  nproc_per_node: 4
+
+env_vars:
+  VLLM_ALLOW_LONG_MAX_MODEL_LEN: "1"
+  VLLM_WORKER_MULTIPROC_METHOD: spawn
+  VLLM_KV_CACHE_SIZE: "0.7"
diff --git a/examples/configs/demo/slurm/deepspeed.yaml b/examples/configs/demo/slurm/deepspeed.yaml
new file mode 100644
index 00000000..01c6c82a
--- /dev/null
+++ b/examples/configs/demo/slurm/deepspeed.yaml
@@ -0,0 +1,21 @@
+# SLURM — DeepSpeed ZeRO distributed training (single node)
+# madengine run --config examples/configs/demo/slurm/deepspeed.yaml
+
+model:
+  tags: [dummy_deepspeed]
+
+slurm:
+  partition: amd-rccl
+  nodes: 1
+  gpus_per_node: 4
+  time: "02:00:00"
+  output_dir: ./slurm_results
+
+distributed:
+  enabled: true
+  launcher: deepspeed
+  nnodes: 1
+  nproc_per_node: 4
+
+env_vars:
+  OMP_NUM_THREADS: "8"
diff --git a/examples/configs/demo/slurm/megatron-lm.yaml b/examples/configs/demo/slurm/megatron-lm.yaml
new file mode 100644
index 00000000..1ebc2e86
--- /dev/null
+++ b/examples/configs/demo/slurm/megatron-lm.yaml
@@ -0,0 +1,26 @@
+# SLURM — Megatron-LM large-scale transformer training (4 nodes × 8 GPUs)
+# madengine run --config examples/configs/demo/slurm/megatron-lm.yaml
+
+model:
+  tags: [dummy_megatron_lm]
+
+slurm:
+  partition: gpu
+  account: research
+  nodes: 4
+  gpus_per_node: 8
+  time: "24:00:00"
+  output_dir: ./slurm_results
+  exclusive: true
+
+distributed:
+  enabled: true
+  launcher: megatron
+  nnodes: 4
+  nproc_per_node: 8
+  master_port: 29500
+
+env_vars:
+  OMP_NUM_THREADS: "16"
+  NCCL_DEBUG: INFO
+  NCCL_IB_DISABLE: "0"
diff --git a/examples/configs/demo/slurm/multi-node-torchrun-infiniband.yaml b/examples/configs/demo/slurm/multi-node-torchrun-infiniband.yaml
new file mode 100644
index 00000000..13350684
--- /dev/null
+++ b/examples/configs/demo/slurm/multi-node-torchrun-infiniband.yaml
@@ -0,0 +1,40 @@
+# SLURM — multi-node torchrun over InfiniBand (4 nodes × 8 GPUs)
+# madengine run --config examples/configs/demo/slurm/multi-node-torchrun-infiniband.yaml
+
+model:
+  tags: [dummy_torchrun]
+
+slurm:
+  partition: amd-rccl
+  nodes: 4
+  gpus_per_node: 8
+  time: "48:00:00"
+  output_dir: ./slurm_results
+  exclusive: true
+  account: my-project
+  qos: high
+  network_interface: ib0
+
+distributed:
+  enabled: true
+  launcher: torchrun
+  nnodes: 4
+  nproc_per_node: 8
+  backend: nccl
+  port: 29500
+
+env_vars:
+  OMP_NUM_THREADS: "16"
+  NCCL_DEBUG: WARN
+  NCCL_TIMEOUT: "1200"
+  TORCH_NCCL_ASYNC_ERROR_HANDLING: "1"
+  TORCH_NCCL_HIGH_PRIORITY: "1"
+  GPU_MAX_HW_QUEUES: "2"
+  HSA_ENABLE_SDMA: "0"
+  HSA_FORCE_FINE_GRAIN_PCIE: "1"
+  MIOPEN_FIND_MODE: "1"
+  MIOPEN_USER_DB_PATH: /tmp/.miopen
+  RCCL_ENABLE_HIPGRAPH: "0"
+  NCCL_IB_DISABLE: "0"
+  NCCL_IB_HCA: "mlx5_0:1,mlx5_1:1"
+  NCCL_SOCKET_IFNAME: ib0
diff --git a/examples/configs/demo/slurm/multi-node-torchrun.yaml b/examples/configs/demo/slurm/multi-node-torchrun.yaml
new file mode 100644
index 00000000..e035474e
--- /dev/null
+++ b/examples/configs/demo/slurm/multi-node-torchrun.yaml
@@ -0,0 +1,36 @@
+# SLURM — multi-node torchrun (2 nodes × 8 GPUs)
+# madengine run --config examples/configs/demo/slurm/multi-node-torchrun.yaml
+
+model:
+  tags: [dummy_torchrun]
+
+slurm:
+  partition: amd-rccl
+  nodes: 2
+  gpus_per_node: 8
+  time: "24:00:00"
+  output_dir: ./slurm_results
+  exclusive: true
+
+distributed:
+  enabled: true
+  launcher: torchrun
+  nnodes: 2
+  nproc_per_node: 8
+  backend: nccl
+  port: 29500
+
+env_vars:
+  OMP_NUM_THREADS: "8"
+  NCCL_DEBUG: WARN
+  NCCL_TIMEOUT: "600"
+  TORCH_NCCL_ASYNC_ERROR_HANDLING: "1"
+  TORCH_NCCL_HIGH_PRIORITY: "1"
+  GPU_MAX_HW_QUEUES: "2"
+  HSA_ENABLE_SDMA: "0"
+  HSA_FORCE_FINE_GRAIN_PCIE: "1"
+  MIOPEN_FIND_MODE: "1"
+  MIOPEN_USER_DB_PATH: /tmp/.miopen
+  RCCL_ENABLE_HIPGRAPH: "0"
+  NCCL_IB_DISABLE: "1"
+  NCCL_SOCKET_IFNAME: eth0
diff --git a/examples/configs/demo/slurm/profiling-multi-gpu.yaml b/examples/configs/demo/slurm/profiling-multi-gpu.yaml
new file mode 100644
index 00000000..2ae4b8a4
--- /dev/null
+++ b/examples/configs/demo/slurm/profiling-multi-gpu.yaml
@@ -0,0 +1,36 @@
+# SLURM — multi-GPU run with RCCL communication + power + VRAM profiling
+# madengine run --config examples/configs/demo/slurm/profiling-multi-gpu.yaml
+
+model:
+  tags: [dummy_torchrun]
+
+slurm:
+  partition: amd-rccl
+  nodes: 1
+  gpus_per_node: 4
+  time: "02:00:00"
+  output_dir: ./slurm_results
+
+distributed:
+  enabled: true
+  launcher: torchrun
+  nnodes: 1
+  nproc_per_node: 4
+
+tools:
+  - name: rocprofv3_lightweight
+  - name: rocprofv3_communication
+    env_vars:
+      RCCL_DEBUG: INFO
+  - name: gpu_info_power_profiler
+    env_vars:
+      POWER_DEVICE: all
+      POWER_SAMPLING_RATE: "0.1"
+  - name: gpu_info_vram_profiler
+    env_vars:
+      VRAM_DEVICE: all
+      VRAM_SAMPLING_RATE: "0.1"
+
+env_vars:
+  OMP_NUM_THREADS: "8"
+  NCCL_DEBUG: WARN
diff --git a/examples/configs/demo/slurm/sglang-disagg.yaml b/examples/configs/demo/slurm/sglang-disagg.yaml
new file mode 100644
index 00000000..9a17d3ad
--- /dev/null
+++ b/examples/configs/demo/slurm/sglang-disagg.yaml
@@ -0,0 +1,41 @@
+# SLURM — SGLang disaggregated inference (5 nodes: 1 proxy + 2 prefill + 2 decode)
+# madengine run --config examples/configs/demo/slurm/sglang-disagg.yaml
+#
+# To customize the prefill/decode split, set sglang_disagg.prefill_nodes and
+# sglang_disagg.decode_nodes (must sum to nnodes - 1 proxy node).
+
+model:
+  tags: [dummy_sglang_disagg]
+
+slurm:
+  partition: amd-rccl
+  nodes: 5
+  gpus_per_node: 8
+  time: "04:00:00"
+  output_dir: ./slurm_results
+  exclusive: true
+
+distributed:
+  enabled: true
+  launcher: sglang-disagg
+  nnodes: 5
+  nproc_per_node: 8
+  backend: nccl
+  port: 29500
+  sglang_disagg:
+    prefill_nodes: 2
+    decode_nodes: 2
+
+env_vars:
+  SGLANG_ALLOW_LONG_MAX_MODEL_LEN: "1"
+  SGLANG_ENABLE_RADIX_CACHE: "1"
+  SGLANG_RADIX_CACHE_SIZE: "0.9"
+  SGLANG_DISAGG_TRANSFER_BACKEND: mooncake
+  HSA_FORCE_FINE_GRAIN_PCIE: "1"
+  HSA_ENABLE_SDMA: "0"
+  GPU_MAX_HW_QUEUES: "2"
+  NCCL_DEBUG: WARN
+  NCCL_IB_DISABLE: "0"
+  NCCL_IB_HCA: mlx5_0
+  NCCL_SOCKET_IFNAME: ib0
+  RAY_DEDUP_LOGS: "1"
diff --git a/examples/configs/demo/slurm/sglang-inference.yaml b/examples/configs/demo/slurm/sglang-inference.yaml
new file mode 100644
index 00000000..0ddbc5f2
--- /dev/null
+++ b/examples/configs/demo/slurm/sglang-inference.yaml
@@ -0,0 +1,32 @@
+# SLURM — SGLang inference, tensor + data parallelism (2 nodes × 4 GPUs)
+# madengine run --config examples/configs/demo/slurm/sglang-inference.yaml
+
+model:
+  tags: [dummy_sglang]
+
+slurm:
+  partition: amd-rccl
+  nodes: 2
+  gpus_per_node: 4
+  time: "04:00:00"
+  output_dir: ./slurm_results
+  exclusive: true
+
+distributed:
+  enabled: true
+  launcher: sglang
+  nnodes: 2
+  nproc_per_node: 4
+  backend: nccl
+  port: 29500
+
+env_vars:
+  SGLANG_ALLOW_LONG_MAX_MODEL_LEN: "1"
+  SGLANG_ENABLE_RADIX_CACHE: "1"
+  SGLANG_RADIX_CACHE_SIZE: "0.9"
+  SGLANG_LOGGING_LEVEL: INFO
+  HSA_FORCE_FINE_GRAIN_PCIE: "1"
+  HSA_ENABLE_SDMA: "0"
+  GPU_MAX_HW_QUEUES: "2"
+  NCCL_DEBUG: WARN
+  RAY_DEDUP_LOGS: "1"
diff --git a/examples/configs/demo/slurm/single-node-single-gpu.yaml b/examples/configs/demo/slurm/single-node-single-gpu.yaml
new file mode 100644
index 00000000..29bf1544
--- /dev/null
+++ b/examples/configs/demo/slurm/single-node-single-gpu.yaml
@@ -0,0 +1,15 @@
+# SLURM — single node, single GPU (no distribution)
+# madengine run --config examples/configs/demo/slurm/single-node-single-gpu.yaml
+
+model:
+  tags: [dummy]
+
+slurm:
+  partition: amd-rccl
+  nodes: 1
+  gpus_per_node: 1
+  time: "01:00:00"
+  output_dir: ./slurm_results
+
+env_vars:
+  OMP_NUM_THREADS: "8"
diff --git a/examples/configs/demo/slurm/torchtitan.yaml b/examples/configs/demo/slurm/torchtitan.yaml
new file mode 100644
index 00000000..eb9233f1
--- /dev/null
+++ b/examples/configs/demo/slurm/torchtitan.yaml
@@ -0,0 +1,26 @@
+# SLURM — TorchTitan LLM pre-training with TP + PP + FSDP2 (4 nodes × 8 GPUs)
+# madengine run --config examples/configs/demo/slurm/torchtitan.yaml
+
+model:
+  tags: [dummy_torchtitan]
+
+slurm:
+  partition: amd-rccl
+  nodes: 4
+  gpus_per_node: 8
+  time: "72:00:00"
+  output_dir: ./slurm_results
+  exclusive: true
+  mem: 512G
+  constraint: MI300X
+
+distributed:
+  enabled: true
+  launcher: torchtitan
+  nnodes: 4
+  nproc_per_node: 8
+  master_port: 29500
+
+env_vars:
+  OMP_NUM_THREADS: "8"
+  NCCL_DEBUG: WARN
diff --git a/examples/configs/demo/slurm/vllm-inference.yaml b/examples/configs/demo/slurm/vllm-inference.yaml
new file mode 100644
index 00000000..3d1dcd72
--- /dev/null
+++ b/examples/configs/demo/slurm/vllm-inference.yaml
@@ -0,0 +1,31 @@
+# SLURM — vLLM inference, data parallelism (2 nodes × 4 GPUs, one replica per node)
+# madengine run --config examples/configs/demo/slurm/vllm-inference.yaml
+
+model:
+  tags: [dummy_vllm]
+
+slurm:
+  partition: amd-rccl
+  nodes: 2
+  gpus_per_node: 4
+  time: "04:00:00"
+  output_dir: ./slurm_results
+  exclusive: true
+  enable_node_check: true
+
+distributed:
+  enabled: true
+  launcher: vllm
+  nnodes: 2
+  nproc_per_node: 4
+  backend: nccl
+  port: 29500
+
+env_vars:
+  VLLM_ALLOW_LONG_MAX_MODEL_LEN: "1"
+  VLLM_WORKER_MULTIPROC_METHOD: spawn
+  VLLM_KV_CACHE_SIZE: "0.8"
+  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
+  HSA_FORCE_FINE_GRAIN_PCIE: "1"
+  NCCL_TIMEOUT: "300"
+  NCCL_DEBUG: WARN
diff --git a/examples/configs/templates/k8s.yaml b/examples/configs/templates/k8s.yaml
new file mode 100644
index 00000000..c6dc95ec
--- /dev/null
+++ b/examples/configs/templates/k8s.yaml
@@ -0,0 +1,180 @@
+# ============================================================
+# madengine YAML Config Template — Kubernetes
+# ============================================================
+# Copy this file, fill in your model and cluster details, and
+# remove or comment out any sections you don't need.
+#
+# Usage:
+#   madengine run --config k8s.yaml
+#   madengine run --config k8s.yaml --config distributed.nnodes=4
+#
+# Target is inferred automatically from the presence of the
+# 'k8s' (or 'kubernetes') key — no explicit deploy field is needed.
+# ============================================================
+
+# ------------------------------------
+# Model selection
+# ------------------------------------
+model:
+  # Run by tag — discovers matching models from models.json
+  tags: [dummy_torchrun]
+
+  # OR point at a pre-built manifest
+  # manifest_file: build_manifest.json
+
+  # OR use a specific container image directly
+  # container_image: myrepo/myimage:latest
+
+  skip_run: false
+  # timeout: 86400
+
+# ------------------------------------
+# Kubernetes job settings
+# ------------------------------------
+k8s:
+  kubeconfig: ~/.kube/config   # Path to kubeconfig file
+  namespace: default            # Kubernetes namespace for the job
+
+  # GPU resources
+  gpu_count: 8                         # GPUs per pod
+  gpu_resource_name: amd.com/gpu       # Resource name (nvidia.com/gpu for NVIDIA)
+
+  # CPU and memory per pod
+  memory: 256Gi
+  memory_limit: 384Gi
+  cpu: "64"
+  cpu_limit: "96"
+
+  # Pod lifecycle
+  image_pull_policy: Always   # Always | IfNotPresent | Never
+  backoff_limit: 3            # Retry attempts on pod failure
+  # ttl_seconds_after_finished: 3600  # Auto-delete job N seconds after completion
+
+  # IPC namespace sharing (required for multi-GPU NCCL/RCCL)
+  host_ipc: true
+
+  # Re-create shared data PVC on each run (useful when data changes)
+  recreate_shared_data_pvc: false
+
+  # Node selection (optional)
+  node_selector: {}
+  #   feature.node.kubernetes.io/amd-gpu-mi300x: "true"
+  #   topology.kubernetes.io/zone: us-west-2a
+
+  # Tolerations (optional — allow scheduling on tainted nodes)
+  tolerations: []
+  #   - key: gpu
+  #     operator: Equal
+  #     value: amd
+  #     effect: NoSchedule
+
+  # Storage (optional)
+  # results_pvc: my-results-pvc     # PVC for writing results (mounted at /results)
+  # data_pvc: my-data-pvc           # Existing PVC for input data (auto-created if omitted)
+
+  # Storage classes (override cluster defaults)
+  # nfs_storage_class: nfs-banff
+  # local_path_storage_class: local-path
+  # data_storage_class: nfs-banff
+
+  # Output dir for generated Kubernetes manifests
+  # output_dir: ./k8s_manifests
+
+  # Secrets (image pull and runtime credentials)
+  secrets:
+    strategy: from_local_credentials   # from_local_credentials | from_secret
+    image_pull_secret_names: []
+    # runtime_secret_name: my-runtime-secret
+
+  # Allow privileged containers for profiling tools (e.g. rocprofv3)
+  # allow_privileged_profiling: true
+
+# ------------------------------------
+# Distributed launcher
+# ------------------------------------
+distributed:
+  enabled: true
+  launcher: torchrun    # torchrun | deepspeed | megatron | torchtitan | vllm | sglang | sglang-disagg | primus | native
+  backend: nccl         # nccl | gloo
+  nnodes: 2             # Number of pods (each runs one replica of the job)
+  nproc_per_node: 8     # Must match k8s.gpu_count
+  master_port: 29500
+
+  # SGLang disaggregated only: override default prefill/decode split
+  # sglang_disagg:
+  #   prefill_nodes: 2
+  #   decode_nodes: 2
+
+# ------------------------------------
+# Environment variables
+# ------------------------------------
+env_vars:
+  OMP_NUM_THREADS: "8"
+  NCCL_DEBUG: WARN
+  NCCL_IB_DISABLE: "1"
+  NCCL_SOCKET_IFNAME: eth0
+  TORCH_NCCL_HIGH_PRIORITY: "1"
+  GPU_MAX_HW_QUEUES: "2"
+  TORCH_NCCL_ASYNC_ERROR_HANDLING: "1"
+  NCCL_TIMEOUT: "600"
+  HSA_ENABLE_SDMA: "0"
+  HSA_FORCE_FINE_GRAIN_PCIE: "1"
+  OMP_NUM_THREADS: "8"
+  MIOPEN_FIND_MODE: "1"
+  MIOPEN_USER_DB_PATH: /tmp/.miopen
+  RCCL_ENABLE_HIPGRAPH: "0"
+
+  # Point to the PVC mount for data provider models
+  # MAD_DATAHOME: /data
+
+# ------------------------------------
+# Hardware (defaults to AMD/Ubuntu)
+# ------------------------------------
+# gpu_vendor: AMD      # AMD | NVIDIA
+# guest_os: UBUNTU     # UBUNTU | CENTOS
+
+# ------------------------------------
+# Profiling tools (optional)
+# Requires allow_privileged_profiling: true in k8s section above
+# ------------------------------------
+# tools: []
+#   - name: rocprofv3_lightweight
+#   - name: rocprofv3_communication
+#     env_vars:
+#       RCCL_DEBUG: INFO
+#   - name: gpu_info_power_profiler
+#     env_vars:
+#       POWER_DEVICE: all
+#       POWER_SAMPLING_RATE: "0.1"
+#   - name: gpu_info_vram_profiler
+#     env_vars:
+#       VRAM_DEVICE: all
+#       VRAM_SAMPLING_RATE: "0.1"
+
+# ------------------------------------
+# Scripts (optional)
+# ------------------------------------
+# pre_scripts: []
+# post_scripts: []
+# encapsulate_script: null
+
+# ------------------------------------
+# Log error scanning (optional)
+# ------------------------------------
+log_error:
+  pattern_scan: true
+  benign_patterns: []
+  patterns: []
+
+# ------------------------------------
+# Output (optional)
+# ------------------------------------
+output: perf.csv
+# summary_output: null
+# data_config: data.json
+
+# ------------------------------------
+# Misc
+# ------------------------------------
+debug: false
+live_output: false
diff --git a/examples/configs/templates/local.yaml b/examples/configs/templates/local.yaml
new file mode 100644
index 00000000..74ce73b4
--- /dev/null
+++ b/examples/configs/templates/local.yaml
@@ -0,0 +1,137 @@
+# ============================================================
+# madengine YAML Config Template — Local Docker
+# ============================================================
+# Copy this file, fill in your model details, and remove or
+# comment out any sections you don't need.
+#
+# Usage:
+#   madengine run --config local.yaml
+#   madengine run --config local.yaml --config docker.gpus=0,1
+#
+# Note: --config is mutually exclusive with --additional-context.
+# ============================================================
+
+# ------------------------------------
+# Model selection (pick one approach)
+# ------------------------------------
+model:
+  # Run by tag — discovers matching models from models.json
+  tags: [dummy]
+
+  # OR point at a pre-built manifest instead of discovering/building
+  # manifest_file: build_manifest.json
+
+  # OR use a specific container image directly (skips build step)
+  # container_image: myrepo/myimage:latest
+
+  # Skip the run step (only build)
+  skip_run: false
+
+  # Per-model timeout in seconds (overrides model's own timeout field)
+  # timeout: 3600
+
+# ------------------------------------
+# Docker options (local runs only)
+# ------------------------------------
+docker:
+  # Extra --build-arg values passed to docker build
+  build_args: {}
+  #   ROCM_VERSION: "6.2"
+  #   BASE_IMAGE: rocm/pytorch:latest
+
+  # Extra -e values passed to docker run
+  env_vars: {}
+  #   MY_VAR: my_value
+
+  # Volume mounts: { /path/in/container: /path/on/host }
+  mounts: {}
+  #   /data: /mnt/shared/datasets
+  #   /results: /home/user/results
+
+  # Override which GPUs to expose (comma-separated indices or "all")
+  # Defaults to all available GPUs on the host
+  # gpus: "0,1"
+
+  # Limit CPU cores (docker --cpus)
+  # cpus: "8"
+
+  # Append arbitrary docker run flags not covered by other options
+  # additional_run_options: "--shm-size=16g --ulimit memlock=-1"
+
+  # Keep container running after the script exits (useful for debugging)
+  keep_alive: false
+
+  # Remove Docker build cache before building
+  clean_cache: false
+
+# ------------------------------------
+# Distributed launcher (optional)
+# ------------------------------------
+# Remove this section entirely for single-GPU, non-distributed runs.
+# distributed:
+#   enabled: true
+#   launcher: torchrun   # torchrun | deepspeed | megatron | torchtitan | vllm | sglang | sglang-disagg | primus | native
+#   backend: nccl        # nccl | gloo
+#   nnodes: 1
+#   nproc_per_node: 4    # GPUs per node
+#   master_port: 29500
+
+# ------------------------------------
+# Environment variables
+# ------------------------------------
+# Injected into the container at runtime (separate from docker.env_vars,
+# which are passed at docker run rather than derived from context).
+env_vars: {}
+#   OMP_NUM_THREADS: "8"
+#   NCCL_DEBUG: WARN
+
+# ------------------------------------
+# Hardware (defaults to AMD/Ubuntu)
+# ------------------------------------
+# gpu_vendor: AMD      # AMD | NVIDIA
+# guest_os: UBUNTU     # UBUNTU | CENTOS
+
+# ------------------------------------
+# Profiling tools (optional)
+# ------------------------------------
+# tools: []
+#   - name: rocprofv3_lightweight
+#   - name: gpu_info_power_profiler
+#     env_vars:
+#       POWER_DEVICE: all
+#       POWER_SAMPLING_RATE: "0.1"
+#   - name: gpu_info_vram_profiler
+#     env_vars:
+#       VRAM_DEVICE: all
+#       VRAM_SAMPLING_RATE: "0.1"
+#   - name: rocm_trace_lite
+#   - name: miopen_trace
+#   - name: rocblas_trace
+
+# ------------------------------------
+# Scripts (optional)
+# ------------------------------------
+# pre_scripts: []   # Run inside the container before the main script
+# post_scripts: []  # Run inside the container after the main script
+# encapsulate_script: null  # Wrap the main script (e.g. a profiler launcher)
+
+# ------------------------------------
+# Log error scanning (optional)
+# ------------------------------------
+log_error:
+  pattern_scan: true   # Scan container output for known error patterns
+  benign_patterns: []  # Regex patterns to ignore (false-positive suppression)
+  patterns: []         # Additional error patterns to flag as failures
+
+# ------------------------------------
+# Output (optional)
+# ------------------------------------
+output: perf.csv           # Where to write the results CSV
+# summary_output: null     # Optional JSON summary file
+# data_config: data.json   # Data provider config file
+
+# ------------------------------------
+# Misc
+# ------------------------------------
+debug: false         # Enable verbose debug logging
+live_output: false   # Stream container stdout/stderr in real time
diff --git a/examples/configs/templates/slurm.yaml b/examples/configs/templates/slurm.yaml
new file mode 100644
index 00000000..a4c396f8
--- /dev/null
+++ b/examples/configs/templates/slurm.yaml
@@ -0,0 +1,165 @@
+# ============================================================
+# madengine YAML Config Template — SLURM
+# ============================================================
+# Copy this file, fill in your model and cluster details, and
+# remove or comment out any sections you don't need.
+#
+# Usage:
+#   madengine run --config slurm.yaml
+#   madengine run --config slurm.yaml --config distributed.nnodes=4
+#
+# Target is inferred automatically from the presence of the
+# 'slurm' key — no explicit deploy field is needed.
+# ============================================================
+
+# ------------------------------------
+# Model selection
+# ------------------------------------
+model:
+  # Run by tag — discovers matching models from models.json
+  tags: [dummy_torchrun]
+
+  # OR point at a pre-built manifest
+  # manifest_file: build_manifest.json
+
+  # OR use a specific container image directly
+  # container_image: myrepo/myimage:latest
+
+  skip_run: false
+  # timeout: 86400
+
+# ------------------------------------
+# SLURM job settings
+# ------------------------------------
+slurm:
+  partition: amd-rccl       # SLURM partition name (required)
+  nodes: 2                  # Number of nodes to allocate
+  gpus_per_node: 8          # GPUs per node
+  time: "24:00:00"          # Wall-clock time limit (HH:MM:SS)
+  output_dir: ./slurm_results  # Where SLURM stdout/stderr logs are written
+  exclusive: true           # Request exclusive node access (no sharing)
+
+  # Optional: restrict to a specific set of nodes (disables node health check)
+  # nodelist: node01,node02
+
+  # Optional: exclude specific nodes
+  # exclude: node03,node04
+
+  # Optional: hardware constraint (e.g. GPU architecture label)
+  # constraint: MI300X
+
+  # Optional: SLURM account and QoS
+  # account: my-project
+  # qos: high
+
+  # Optional: memory limit per node (e.g. "256G")
+  # mem: 256G
+
+  # Optional: environment modules to load before the job starts
+  # modules:
+  #   - rocm/6.2.0
+  #   - gcc/11.2.0
+
+  # Optional: shared filesystem paths (accessible from all nodes)
+  # shared_workspace: /lustre/shared/workspace
+  # results_dir: /lustre/shared/results
+
+  # Optional: network interface for NCCL/RCCL (override auto-detection)
+  # network_interface: ib0
+
+  # Optional: node health preflight (GPU memory check before job starts)
+  enable_node_check: true
+  auto_cleanup_nodes: false
+  verbose_node_check: false
+
+# ------------------------------------
+# Distributed launcher
+# ------------------------------------
+distributed:
+  launcher: torchrun    # torchrun | deepspeed | megatron | torchtitan | vllm | sglang | sglang-disagg | primus | native
+  backend: nccl         # nccl | gloo
+  nnodes: 2             # Must match slurm.nodes
+  nproc_per_node: 8     # Must match slurm.gpus_per_node
+  port: 29500           # Master port for distributed rendezvous
+
+  # SGLang disaggregated only: override default prefill/decode split
+  # sglang_disagg:
+  #   prefill_nodes: 2
+  #   decode_nodes: 2
+
+# ------------------------------------
+# Environment variables
+# ------------------------------------
+env_vars:
+  OMP_NUM_THREADS: "8"
+  NCCL_DEBUG: WARN
+  NCCL_DEBUG_SUBSYS: "INIT,NET"
+  NCCL_TIMEOUT: "600"
+  TORCH_NCCL_ASYNC_ERROR_HANDLING: "1"
+  TORCH_NCCL_HIGH_PRIORITY: "1"
+  GPU_MAX_HW_QUEUES: "2"
+  HSA_ENABLE_SDMA: "0"
+  HSA_FORCE_FINE_GRAIN_PCIE: "1"
+  MIOPEN_FIND_MODE: "1"
+  MIOPEN_USER_DB_PATH: /tmp/.miopen
+  RCCL_ENABLE_HIPGRAPH: "0"
+
+  # InfiniBand (uncomment when using IB interconnect)
+  # NCCL_IB_DISABLE: "0"
+  # NCCL_IB_HCA: "mlx5_0:1,mlx5_1:1"
+  # NCCL_SOCKET_IFNAME: ib0
+
+  # Ethernet (uncomment when using Ethernet interconnect)
+  # NCCL_IB_DISABLE: "1"
+  # NCCL_SOCKET_IFNAME: eth0
+
+# ------------------------------------
+# Hardware (defaults to AMD/Ubuntu)
+# ------------------------------------
+# gpu_vendor: AMD      # AMD | NVIDIA
+# guest_os: UBUNTU     # UBUNTU | CENTOS
+
+# ------------------------------------
+# Profiling tools (optional)
+# ------------------------------------
+# tools: []
+#   - name: rocprofv3_lightweight
+#   - name: rocprofv3_communication
+#     env_vars:
+#       RCCL_DEBUG: INFO
+#   - name: gpu_info_power_profiler
+#     env_vars:
+#       POWER_DEVICE: all
+#       POWER_SAMPLING_RATE: "0.1"
+#   - name: gpu_info_vram_profiler
+#     env_vars:
+#       VRAM_DEVICE: all
+#       VRAM_SAMPLING_RATE: "0.1"
+
+# ------------------------------------
+# Scripts (optional)
+# ------------------------------------
+# pre_scripts: []
+# post_scripts: []
+# encapsulate_script: null
+
+# ------------------------------------
+# Log error scanning (optional)
+# ------------------------------------
+log_error:
+  pattern_scan: true
+  benign_patterns: []
+  patterns: []
+
+# ------------------------------------
+# Output (optional)
+# ------------------------------------
+output: perf.csv
+# summary_output: null
+# data_config: data.json
+
+# ------------------------------------
+# Misc
+# ------------------------------------
+debug: false
+live_output: false
diff --git a/examples/k8s-configs/basic/01-native-single-node-single-gpu-tools.json b/examples/k8s-configs/basic/01-native-single-node-single-gpu-tools.json
index 8acb9127..a5fa27e4 100644
--- a/examples/k8s-configs/basic/01-native-single-node-single-gpu-tools.json
+++ b/examples/k8s-configs/basic/01-native-single-node-single-gpu-tools.json
@@ -2,32 +2,31 @@
   "_comment": "Single Node, Single GPU with Tools",
   "_description": "Single GPU configuration with GPU profiling tools",
   "_use_case": "Single GPU benchmarks with monitoring, no distributed execution",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "tools": [{
     "name": "gpu_info_vram_profiler"
   }],
-  
+
   "k8s": {
     "kubeconfig": "~/.kube/config",
     "namespace": "default",
     "gpu_count": 1,
-    
+
     "memory": "16Gi",
     "memory_limit": "32Gi",
     "cpu": "8",
     "cpu_limit": "16",
-    
+
     "image_pull_policy": "Always",
     "backoff_limit": 3
   },
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "8"
   },
-  
+
   "debug": false
 }
-
diff --git a/examples/k8s-configs/basic/01-native-single-node-single-gpu.json b/examples/k8s-configs/basic/01-native-single-node-single-gpu.json
index 373c8eea..9d7174df 100644
--- a/examples/k8s-configs/basic/01-native-single-node-single-gpu.json
+++ b/examples/k8s-configs/basic/01-native-single-node-single-gpu.json
@@ -2,27 +2,27 @@
   "_comment": "Single Node, Single GPU - Basic Configuration",
   "_description": "Configuration for running a model on a single GPU in a Kubernetes cluster",
   "_use_case": "Testing, small models, quick benchmarks (single GPU, no distributed execution)",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "kubeconfig": "~/.kube/config",
     "namespace": "default",
     "gpu_count": 1,
-    
+
     "memory": "16Gi",
     "memory_limit": "32Gi",
     "cpu": "8",
     "cpu_limit": "16",
-    
+
     "image_pull_policy": "Always",
     "backoff_limit": 3
   },
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "8"
   },
-  
+
   "debug": false
 }
diff --git a/examples/k8s-configs/basic/02-torchrun-single-node-multi-gpu-tools.json b/examples/k8s-configs/basic/02-torchrun-single-node-multi-gpu-tools.json
index 3c5f80ae..695c6804 100644
--- a/examples/k8s-configs/basic/02-torchrun-single-node-multi-gpu-tools.json
+++ b/examples/k8s-configs/basic/02-torchrun-single-node-multi-gpu-tools.json
@@ -2,26 +2,26 @@
   "_comment": "Single Node, Multiple GPUs (2 GPUs) with Tools",
   "_description": "2 GPU configuration with torchrun and GPU profiling tools",
   "_use_case": "Multi-GPU training with performance monitoring on busy clusters",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "tools": [{"name": "gpu_info_power_profiler"}, {"name": "gpu_info_vram_profiler"}, {"name": "rocprof"}, {"name": "rpd"}, {"name": "miopen_trace"}, {"name": "rocblas_trace"}, {"name": "tensile_trace"}],
-  
+
   "k8s": {
     "kubeconfig": "~/.kube/config",
     "namespace": "default",
     "gpu_count": 2,
-    
+
     "memory": "64Gi",
     "memory_limit": "128Gi",
     "cpu": "16",
     "cpu_limit": "32",
-    
+
     "image_pull_policy": "Always",
     "backoff_limit": 3
   },
-  
+
   "distributed": {
     "enabled": true,
     "backend": "nccl",
@@ -30,7 +30,7 @@
     "nproc_per_node": 2,
     "master_port": 29500
   },
-  
+
   "env_vars": {
     "NCCL_DEBUG": "WARN",
     "NCCL_IB_DISABLE": "1",
@@ -44,7 +44,7 @@
     "HSA_FORCE_FINE_GRAIN_PCIE": "1",
     "RCCL_ENABLE_HIPGRAPH": "0"
   },
-  
+
   "_env_var_notes": {
     "NCCL_DEBUG": "Changed from INFO to WARN to reduce log verbosity",
     "MIOPEN_FIND_MODE": "1 = Use compiled kernels, avoid find-db warnings",
@@ -52,6 +52,6 @@
     "HSA_FORCE_FINE_GRAIN_PCIE": "Helps with IOMMU-related warnings in containers",
     "RCCL_ENABLE_HIPGRAPH": "Disable for compatibility (experimental feature)"
   },
-  
+
   "debug": false
 }
diff --git a/examples/k8s-configs/basic/02-torchrun-single-node-multi-gpu.json b/examples/k8s-configs/basic/02-torchrun-single-node-multi-gpu.json
index be0d7c5e..d0a7ebf5 100644
--- a/examples/k8s-configs/basic/02-torchrun-single-node-multi-gpu.json
+++ b/examples/k8s-configs/basic/02-torchrun-single-node-multi-gpu.json
@@ -2,25 +2,25 @@
   "_comment": "Single Node, Multiple GPUs (2 GPUs) - Multi-GPU Testing",
   "_description": "Configuration for running a model on 2 GPUs on a single node with torchrun",
   "_use_case": "Multi-GPU training and testing on busy clusters",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "kubeconfig": "~/.kube/config",
     "namespace": "default",
     "gpu_count": 2,
-    
+
     "memory": "64Gi",
     "memory_limit": "128Gi",
     "cpu": "16",
     "cpu_limit": "32",
-    
+
     "image_pull_policy": "Always",
     "backoff_limit": 3,
     "recreate_shared_data_pvc": true
   },
-  
+
   "distributed": {
     "enabled": true,
     "backend": "nccl",
@@ -29,7 +29,7 @@
     "nproc_per_node": 2,
     "master_port": 29500
   },
-  
+
   "env_vars": {
     "NCCL_DEBUG": "WARN",
     "NCCL_IB_DISABLE": "1",
@@ -43,7 +43,7 @@
     "HSA_FORCE_FINE_GRAIN_PCIE": "1",
     "RCCL_ENABLE_HIPGRAPH": "0"
   },
-  
+
   "_env_var_notes": {
     "NCCL_DEBUG": "Changed from INFO to WARN to reduce log verbosity",
     "MIOPEN_FIND_MODE": "1 = Use compiled kernels, avoid find-db warnings",
@@ -52,6 +52,6 @@
     "RCCL_ENABLE_HIPGRAPH": "Disable for compatibility (experimental feature)",
     "NCCL_MIN_NCHANNELS": "Removed (warning says ignored for <8 GPUs)"
   },
-  
+
   "debug": false
 }
diff --git a/examples/k8s-configs/basic/03-torchrun-multi-node-basic.json b/examples/k8s-configs/basic/03-torchrun-multi-node-basic.json
index 0c2205f9..bf3a34af 100644
--- a/examples/k8s-configs/basic/03-torchrun-multi-node-basic.json
+++ b/examples/k8s-configs/basic/03-torchrun-multi-node-basic.json
@@ -2,26 +2,26 @@
   "_comment": "Multi-Node (2 nodes, 2 GPUs each) - Basic Configuration",
   "_description": "Configuration for distributed workload across 2 nodes with 2 GPUs per node (4 GPUs total)",
   "_use_case": "Multi-node distributed execution testing on busy clusters",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "kubeconfig": "~/.kube/config",
     "namespace": "default",
     "gpu_count": 2,
-    
+
     "memory": "64Gi",
     "memory_limit": "128Gi",
     "cpu": "16",
     "cpu_limit": "32",
-    
+
     "image_pull_policy": "Always",
     "backoff_limit": 3,
     "host_ipc": true,
     "recreate_shared_data_pvc": true
   },
-  
+
   "distributed": {
     "enabled": true,
     "backend": "nccl",
@@ -30,7 +30,7 @@
     "nproc_per_node": 2,
     "master_port": 29500
   },
-  
+
   "env_vars": {
     "NCCL_DEBUG": "WARN",
     "NCCL_DEBUG_SUBSYS": "INIT,NET",
@@ -48,7 +48,7 @@
     "HSA_NO_SCRATCH_RECLAIM": "1",
     "RCCL_ENABLE_HIPGRAPH": "0"
   },
-  
+
   "_env_var_notes": {
     "NCCL_DEBUG": "Changed to WARN for cleaner logs (use INFO for debugging)",
     "MIOPEN_FIND_MODE": "1 = Use compiled kernels, avoid find-db warnings",
@@ -57,6 +57,6 @@
     "RCCL_ENABLE_HIPGRAPH": "Disable for compatibility",
     "NCCL_MIN_NCHANNELS": "Removed (warning says ignored for <8 GPUs)"
   },
-  
+
   "debug": false
 }
diff --git a/examples/k8s-configs/basic/04-torchrun-multi-node-advanced.json b/examples/k8s-configs/basic/04-torchrun-multi-node-advanced.json
index 5560ffab..577ed424 100644
--- a/examples/k8s-configs/basic/04-torchrun-multi-node-advanced.json
+++ b/examples/k8s-configs/basic/04-torchrun-multi-node-advanced.json
@@ -2,31 +2,31 @@
   "_comment": "Multi-Node (4 nodes, 2 GPUs each) - Advanced Configuration",
   "_description": "Full-featured configuration for large-scale distributed workloads with PVCs, tolerations, and node affinity",
   "_use_case": "Multi-node distributed execution with advanced features on busy clusters (8 GPUs total)",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "kubeconfig": "~/.kube/config",
     "namespace": "ml-training",
     "gpu_count": 2,
     "gpu_resource_name": "amd.com/gpu",
-    
+
     "memory": "128Gi",
     "memory_limit": "192Gi",
     "cpu": "24",
     "cpu_limit": "32",
-    
+
     "image_pull_policy": "IfNotPresent",
     "backoff_limit": 5,
     "host_ipc": true,
-    
+
     "node_selector": {
       "feature.node.kubernetes.io/amd-gpu-mi300x": "true",
       "topology.kubernetes.io/zone": "us-west-2a",
       "workload-type": "ml-training"
     },
-    
+
     "tolerations": [
       {
         "key": "gpu",
@@ -41,13 +41,13 @@
         "effect": "NoSchedule"
       }
     ],
-    
+
     "results_pvc": "ml-results-pvc",
     "data_pvc": "ml-datasets-pvc",
-    
+
     "output_dir": "./k8s_manifests/multi-node"
   },
-  
+
   "distributed": {
     "enabled": true,
     "backend": "nccl",
@@ -56,7 +56,7 @@
     "nproc_per_node": 2,
     "master_port": 29500
   },
-  
+
   "env_vars": {
     "NCCL_DEBUG": "WARN",
     "NCCL_DEBUG_SUBSYS": "INIT,NET,GRAPH",
@@ -73,7 +73,7 @@
     "MIOPEN_USER_DB_PATH": "/tmp/.miopen",
     "RCCL_ENABLE_HIPGRAPH": "0"
   },
-  
+
   "_env_var_notes": {
     "NCCL_DEBUG": "Changed to WARN for cleaner logs (use INFO for debugging)",
     "MIOPEN_FIND_MODE": "1 = Use compiled kernels, avoid find-db warnings",
@@ -82,6 +82,6 @@
     "RCCL_ENABLE_HIPGRAPH": "Disable for compatibility",
     "NCCL_MIN_NCHANNELS": "Removed (warning says ignored for <8 GPUs)"
   },
-  
+
   "debug": false
 }
diff --git a/examples/k8s-configs/basic/05-torchrun-nvidia-gpu-example.json b/examples/k8s-configs/basic/05-torchrun-nvidia-gpu-example.json
index 7c087acc..14665bdb 100644
--- a/examples/k8s-configs/basic/05-torchrun-nvidia-gpu-example.json
+++ b/examples/k8s-configs/basic/05-torchrun-nvidia-gpu-example.json
@@ -2,29 +2,29 @@
   "_comment": "NVIDIA GPU - Single Node, 4 GPUs",
   "_description": "Configuration for running models on NVIDIA GPUs (A100, H100, etc.) with distributed execution",
   "_use_case": "NVIDIA-based Kubernetes clusters, multi-GPU training",
-  
+
   "gpu_vendor": "NVIDIA",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "kubeconfig": "~/.kube/config",
     "namespace": "default",
     "gpu_count": 4,
     "gpu_resource_name": "nvidia.com/gpu",
-    
+
     "memory": "128Gi",
     "memory_limit": "256Gi",
     "cpu": "48",
     "cpu_limit": "96",
-    
+
     "image_pull_policy": "Always",
     "backoff_limit": 3,
-    
+
     "node_selector": {
       "accelerator": "nvidia-tesla-a100"
     }
   },
-  
+
   "distributed": {
     "enabled": true,
     "backend": "nccl",
@@ -33,7 +33,7 @@
     "nproc_per_node": 4,
     "master_port": 29500
   },
-  
+
   "env_vars": {
     "NCCL_DEBUG": "INFO",
     "NCCL_IB_DISABLE": "1",
@@ -42,6 +42,6 @@
     "NCCL_P2P_LEVEL": "NVL",
     "OMP_NUM_THREADS": "12"
   },
-  
+
   "debug": false
 }
diff --git a/examples/k8s-configs/basic/06-data-provider-with-pvc.json b/examples/k8s-configs/basic/06-data-provider-with-pvc.json
index 9bd2e47f..a00b3eb9 100644
--- a/examples/k8s-configs/basic/06-data-provider-with-pvc.json
+++ b/examples/k8s-configs/basic/06-data-provider-with-pvc.json
@@ -3,50 +3,50 @@
   "_description": "Production-ready setup for training with external data (MinIO, S3, NAS, etc.)",
   "_use_case": "Models that require data provider (e.g., dummy_torchrun_data_minio)",
   "_auto_pvc": "✅ PVC is automatically created - NO manual kubectl commands needed!",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "kubeconfig": "~/.kube/config",
     "namespace": "default",
     "gpu_count": 2,
-    
+
     "_comment_pvc": "OPTIONAL - Leave empty for auto-creation (recommended)",
     "_pvc_auto": "Auto-created: madengine-shared-data (100Gi, RWO/RWX based on nnodes)",
     "_pvc_custom": "To use existing PVC: uncomment and set: \"data_pvc\": \"your-pvc-name\"",
-    
+
     "memory": "64Gi",
     "memory_limit": "128Gi",
     "cpu": "16",
     "cpu_limit": "32",
-    
+
     "image_pull_policy": "Always",
     "backoff_limit": 3,
     "host_ipc": true
   },
-  
+
   "distributed": {
     "enabled": true,
     "backend": "nccl",
     "launcher": "torchrun",
-    
+
     "_comment_single_node": "For single-node: nnodes=1, nproc_per_node=N_GPUs",
     "_comment_multi_node": "For multi-node: nnodes=N, nproc_per_node=GPUs_per_node",
     "nnodes": 1,
     "nproc_per_node": 2,
-    
+
     "master_port": 29500
   },
-  
+
   "env_vars": {
     "_comment_mad_datahome": "MAD_DATAHOME points to PVC mount point (default: /data)",
     "MAD_DATAHOME": "/data",
-    
+
     "_comment_nccl": "NCCL/RCCL configuration for AMD GPUs",
     "NCCL_DEBUG": "WARN",
     "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1",
-    
+
     "_comment_rocm": "ROCm optimizations",
     "HSA_FORCE_FINE_GRAIN_PCIE": "1",
     "HSA_ENABLE_SDMA": "0",
@@ -54,13 +54,13 @@
     "MIOPEN_USER_DB_PATH": "/tmp/.miopen",
     "RCCL_ENABLE_HIPGRAPH": "0"
   },
-  
+
   "_quick_start": {
     "step_1": "Build: madengine build --tags dummy_torchrun_data_minio --additional-context-file THIS_FILE",
     "step_2": "Run: madengine run --manifest-file build_manifest.json",
     "result": "✅ PVC auto-created, data downloaded, training started - all automatic!"
   },
-  
+
   "_how_it_works": {
     "auto_pvc": "madengine creates 'madengine-shared-data' PVC automatically if not found",
     "reusable": "PVC persists across runs - data downloads once, reuses forever",
@@ -68,13 +68,12 @@
     "verify": "kubectl get pvc madengine-shared-data",
     "inspect": "kubectl describe pvc madengine-shared-data"
   },
-  
+
   "_advanced": {
     "custom_pvc": "To use existing PVC: Add \"data_pvc\": \"your-pvc-name\" to k8s config above",
     "storage_class": "Auto-PVC uses cluster's default storage class",
     "pvc_size": "Default 100Gi - modify code in kubernetes.py if needed"
   },
-  
+
   "debug": false
 }
-
diff --git a/examples/k8s-configs/basic/megatron-lm-multi-node-basic.json b/examples/k8s-configs/basic/megatron-lm-multi-node-basic.json
index e059ba08..30a4377c 100644
--- a/examples/k8s-configs/basic/megatron-lm-multi-node-basic.json
+++ b/examples/k8s-configs/basic/megatron-lm-multi-node-basic.json
@@ -3,10 +3,10 @@
   "_description": "Large-scale transformer training with Megatron-LM on Kubernetes",
   "_use_case": "Multi-node Megatron-LM training with tensor and pipeline parallelism",
   "_reference": "https://github.com/NVIDIA/Megatron-LM",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 8,
     "namespace": "ml-training",
@@ -16,19 +16,18 @@
     "cpu_limit": "64",
     "image_pull_policy": "IfNotPresent"
   },
-  
+
   "distributed": {
     "launcher": "megatron",
     "nnodes": 4,
     "nproc_per_node": 8,
     "master_port": 29500
   },
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "16",
     "NCCL_DEBUG": "INFO"
   },
-  
+
   "debug": false
 }
-
diff --git a/examples/k8s-configs/basic/sglang-disagg-custom-split.json b/examples/k8s-configs/basic/sglang-disagg-custom-split.json
index 49aeecb1..41a26b55 100644
--- a/examples/k8s-configs/basic/sglang-disagg-custom-split.json
+++ b/examples/k8s-configs/basic/sglang-disagg-custom-split.json
@@ -10,10 +10,10 @@
     "total": "7 pods total",
     "note": "Custom split overrides default 40/60 ratio"
   },
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 8,
     "memory": "256Gi",
@@ -24,7 +24,7 @@
       "feature.node.kubernetes.io/amd-gpu-mi300x": "true"
     }
   },
-  
+
   "distributed": {
     "launcher": "sglang-disagg",
     "nnodes": 7,
@@ -35,7 +35,7 @@
       "decode_nodes": 2
     }
   },
-  
+
   "context": {
     "env_vars": {
       "SGLANG_ENABLE_RADIX_CACHE": "1",
@@ -45,4 +45,3 @@
     }
   }
 }
-
diff --git a/examples/k8s-configs/basic/sglang-disagg-multi-node-basic.json b/examples/k8s-configs/basic/sglang-disagg-multi-node-basic.json
index c16fd342..b632d49b 100644
--- a/examples/k8s-configs/basic/sglang-disagg-multi-node-basic.json
+++ b/examples/k8s-configs/basic/sglang-disagg-multi-node-basic.json
@@ -9,10 +9,10 @@
     "decode": "Pods 3-4 (2 nodes, ~60%)",
     "total": "5 pods total"
   },
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 8,
     "memory": "256Gi",
@@ -23,14 +23,14 @@
       "feature.node.kubernetes.io/amd-gpu-mi300x": "true"
     }
   },
-  
+
   "distributed": {
     "launcher": "sglang-disagg",
     "nnodes": 5,
     "nproc_per_node": 8,
     "master_port": 29500
   },
-  
+
   "context": {
     "env_vars": {
       "SGLANG_ENABLE_RADIX_CACHE": "1",
@@ -41,4 +41,3 @@
     }
   }
 }
-
diff --git a/examples/k8s-configs/basic/sglang-multi-node-basic.json b/examples/k8s-configs/basic/sglang-multi-node-basic.json
index b693260e..4c4fe1ce 100644
--- a/examples/k8s-configs/basic/sglang-multi-node-basic.json
+++ b/examples/k8s-configs/basic/sglang-multi-node-basic.json
@@ -3,10 +3,10 @@
   "_description": "Multi-node SGLang with native launcher and Ray",
   "_use_case": "Distributed LLM inference serving",
   "_reference": "https://github.com/sgl-project/sglang",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 4,
     "memory": "256Gi",
@@ -17,14 +17,14 @@
       "feature.node.kubernetes.io/amd-gpu-mi300x": "true"
     }
   },
-  
+
   "distributed": {
     "launcher": "sglang",
     "nnodes": 2,
     "nproc_per_node": 4,
     "master_port": 29500
   },
-  
+
   "context": {
     "env_vars": {
       "SGLANG_KV_CACHE_SIZE": "0.5",
@@ -33,4 +33,3 @@
     }
   }
 }
-
diff --git a/examples/k8s-configs/basic/torchtitan-multi-node-basic.json b/examples/k8s-configs/basic/torchtitan-multi-node-basic.json
index e350605d..0cd41bfe 100644
--- a/examples/k8s-configs/basic/torchtitan-multi-node-basic.json
+++ b/examples/k8s-configs/basic/torchtitan-multi-node-basic.json
@@ -3,10 +3,10 @@
   "_description": "Uses multi-dimensional parallelism (TP + PP + FSDP2)",
   "_use_case": "Large-scale LLM pre-training (70B+ models)",
   "_reference": "https://github.com/pytorch/torchtitan",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 8,
     "memory": "512Gi",
@@ -17,14 +17,14 @@
       "feature.node.kubernetes.io/amd-gpu-mi300x": "true"
     }
   },
-  
+
   "distributed": {
     "launcher": "torchtitan",
     "nnodes": 4,
     "nproc_per_node": 8,
     "master_port": 29500
   },
-  
+
   "context": {
     "pre_scripts": [
       "scripts/common/setup_pytorch_env.sh"
@@ -36,4 +36,3 @@
     }
   }
 }
-
diff --git a/examples/k8s-configs/basic/vllm-multi-node-basic.json b/examples/k8s-configs/basic/vllm-multi-node-basic.json
index 4c1b61c9..67e191d1 100644
--- a/examples/k8s-configs/basic/vllm-multi-node-basic.json
+++ b/examples/k8s-configs/basic/vllm-multi-node-basic.json
@@ -3,10 +3,10 @@
   "_description": "Each pod runs independent vLLM replica for higher throughput",
   "_use_case": "High-throughput LLM inference serving",
   "_reference": "https://github.com/vllm-project/vllm",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 4,
     "memory": "256Gi",
@@ -17,14 +17,14 @@
       "feature.node.kubernetes.io/amd-gpu-mi300x": "true"
     }
   },
-  
+
   "distributed": {
     "launcher": "vllm",
     "nnodes": 2,
     "nproc_per_node": 4,
     "master_port": 29500
   },
-  
+
   "context": {
     "env_vars": {
       "VLLM_KV_CACHE_SIZE": "0.5",
@@ -35,4 +35,3 @@
     }
   }
 }
-
diff --git a/examples/k8s-configs/minimal/custom-namespace-minimal.json b/examples/k8s-configs/minimal/custom-namespace-minimal.json
index fa3747dd..bac0ef06 100644
--- a/examples/k8s-configs/minimal/custom-namespace-minimal.json
+++ b/examples/k8s-configs/minimal/custom-namespace-minimal.json
@@ -2,14 +2,13 @@
   "_comment": "Minimal Config with Custom Namespace",
   "_description": "Shows how to override specific defaults",
   "_use_case": "Deploying to a specific namespace",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 1,
     "namespace": "default",
     "memory": "32Gi"
   }
 }
-
diff --git a/examples/k8s-configs/minimal/deepspeed-minimal.json b/examples/k8s-configs/minimal/deepspeed-minimal.json
index 7bece847..7fdcad8c 100644
--- a/examples/k8s-configs/minimal/deepspeed-minimal.json
+++ b/examples/k8s-configs/minimal/deepspeed-minimal.json
@@ -2,23 +2,23 @@
   "_comment": "DeepSpeed Minimal Config - Uses bash script with torchrun",
   "_description": "DeepSpeed with ZeRO-1 optimization",
   "_use_case": "Test DeepSpeed distributed training with bash wrapper",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 2,
     "namespace": "default",
     "memory": "32Gi",
     "cpu": "16"
   },
-  
+
   "distributed": {
     "launcher": "deepspeed",
     "nnodes": 1,
     "nproc_per_node": 2
   },
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "8"
   }
diff --git a/examples/k8s-configs/minimal/megatron-lm-exclude-node.json b/examples/k8s-configs/minimal/megatron-lm-exclude-node.json
index 793431a2..6571b445 100644
--- a/examples/k8s-configs/minimal/megatron-lm-exclude-node.json
+++ b/examples/k8s-configs/minimal/megatron-lm-exclude-node.json
@@ -3,40 +3,39 @@
   "_description": "Use this if you need to explicitly exclude a node with disk pressure or other issues",
   "_use_case": "Temporary config to avoid problematic nodes during maintenance",
   "_note": "This uses anti-affinity to exclude banff-pla-r25-05. Update the hostname as needed.",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 2,
     "namespace": "default",
-    
+
     "memory": "32Gi",
     "memory_limit": "128Gi",
     "cpu": "16",
     "cpu_limit": "32",
-    
+
     "image_pull_policy": "IfNotPresent",
-    
+
     "node_selector": {
       "feature.node.kubernetes.io/amd-gpu": "true"
     }
   },
-  
+
   "distributed": {
     "launcher": "megatron",
     "nnodes": 1,
     "nproc_per_node": 2
   },
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "8"
   },
-  
+
   "_instructions": [
     "To exclude a specific node, add node affinity in the deployment code,",
     "or temporarily drain the node: kubectl drain banff-pla-r25-05 --ignore-daemonsets",
     "This config ensures scheduling only on nodes with AMD GPUs"
   ]
 }
-
diff --git a/examples/k8s-configs/minimal/megatron-lm-minimal.json b/examples/k8s-configs/minimal/megatron-lm-minimal.json
index 43266e01..86f3db49 100644
--- a/examples/k8s-configs/minimal/megatron-lm-minimal.json
+++ b/examples/k8s-configs/minimal/megatron-lm-minimal.json
@@ -2,23 +2,23 @@
   "_comment": "Megatron-LM Minimal Config - Dedicated launcher support",
   "_description": "Megatron-LM with automated tensor/pipeline parallelism setup",
   "_use_case": "Large-scale transformer training with Megatron-LM on Kubernetes",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 2,
     "namespace": "default",
     "memory": "32Gi",
     "cpu": "16"
   },
-  
+
   "distributed": {
     "launcher": "megatron",
     "nnodes": 1,
     "nproc_per_node": 2
   },
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "8"
   }
diff --git a/examples/k8s-configs/minimal/megatron-lm-optimized.json b/examples/k8s-configs/minimal/megatron-lm-optimized.json
index 29559308..d25eadec 100644
--- a/examples/k8s-configs/minimal/megatron-lm-optimized.json
+++ b/examples/k8s-configs/minimal/megatron-lm-optimized.json
@@ -2,30 +2,30 @@
   "_comment": "Optimized Megatron-LM Configuration with Node Selector",
   "_description": "Production-ready configuration with resource management and node selection",
   "_use_case": "Megatron-LM training with automatic node selection to avoid problematic nodes",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 2,
     "namespace": "default",
-    
+
     "memory": "32Gi",
     "memory_limit": "128Gi",
     "cpu": "16",
     "cpu_limit": "32",
-    
+
     "image_pull_policy": "IfNotPresent",
     "backoff_limit": 3,
-    
+
     "node_selector": {
       "feature.node.kubernetes.io/amd-gpu": "true",
       "amd.com/gpu.product-name": "AMD_Instinct_MI300X_OAM"
     },
-    
+
     "tolerations": []
   },
-  
+
   "distributed": {
     "enabled": true,
     "backend": "nccl",
@@ -34,7 +34,7 @@
     "nproc_per_node": 2,
     "master_port": 29500
   },
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "8",
     "NCCL_DEBUG": "WARN",
@@ -47,7 +47,6 @@
     "HSA_FORCE_FINE_GRAIN_PCIE": "1",
     "RCCL_ENABLE_HIPGRAPH": "0"
   },
-  
+
   "debug": false
 }
-
diff --git a/examples/k8s-configs/minimal/sglang-disagg-minimal.json b/examples/k8s-configs/minimal/sglang-disagg-minimal.json
index f0f6ad05..c1683f1c 100644
--- a/examples/k8s-configs/minimal/sglang-disagg-minimal.json
+++ b/examples/k8s-configs/minimal/sglang-disagg-minimal.json
@@ -2,18 +2,17 @@
   "_comment": "Minimal SGLang Disaggregated configuration - 3 nodes minimum",
   "_description": "SGLang disaggregated inference with 3 pods (1 proxy + 1 prefill + 1 decode)",
   "_architecture": "Pod 0: Proxy, Pod 1: Prefill, Pod 2: Decode",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-    
+
   "k8s": {
     "gpu_count": 3
   },
-  
+
   "distributed": {
     "launcher": "sglang-disagg",
     "nnodes": 3,
     "nproc_per_node": 1
   }
 }
-
diff --git a/examples/k8s-configs/minimal/sglang-single-node-minimal.json b/examples/k8s-configs/minimal/sglang-single-node-minimal.json
index 5a12b19d..b3f0e297 100644
--- a/examples/k8s-configs/minimal/sglang-single-node-minimal.json
+++ b/examples/k8s-configs/minimal/sglang-single-node-minimal.json
@@ -3,26 +3,25 @@
   "_description": "SGLang inference with Tensor Parallelism for single-node",
   "_use_case": "LLM inference serving with SGLang",
   "_reference": "https://github.com/sgl-project/sglang",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 4,
     "memory": "128Gi",
     "cpu": "32"
   },
-  
+
   "distributed": {
     "launcher": "sglang",
     "nnodes": 1,
     "nproc_per_node": 4
   },
-  
+
   "context": {
     "env_vars": {
       "SGLANG_KV_CACHE_SIZE": "0.7"
     }
   }
 }
-
diff --git a/examples/k8s-configs/minimal/torchrun-multi-gpu-minimal.json b/examples/k8s-configs/minimal/torchrun-multi-gpu-minimal.json
index 49a2ebbf..f5a34635 100644
--- a/examples/k8s-configs/minimal/torchrun-multi-gpu-minimal.json
+++ b/examples/k8s-configs/minimal/torchrun-multi-gpu-minimal.json
@@ -2,18 +2,17 @@
   "_comment": "Minimal Multi-GPU Config - 2 GPUs with torchrun",
   "_description": "Uses built-in defaults for AMD multi-GPU optimizations",
   "_use_case": "Quick multi-GPU training with minimal configuration",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 2
   },
-  
+
   "distributed": {
     "launcher": "torchrun",
     "nnodes": 1,
     "nproc_per_node": 2
   }
 }
-
diff --git a/examples/k8s-configs/minimal/torchrun-multi-node-minimal.json b/examples/k8s-configs/minimal/torchrun-multi-node-minimal.json
index 656ac123..bf194838 100644
--- a/examples/k8s-configs/minimal/torchrun-multi-node-minimal.json
+++ b/examples/k8s-configs/minimal/torchrun-multi-node-minimal.json
@@ -2,18 +2,17 @@
   "_comment": "Minimal Multi-Node Config - 2 nodes x 2 GPUs each",
   "_description": "Uses built-in defaults for multi-node distributed workload",
   "_use_case": "Quick multi-node testing with 4 GPUs total",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 2
   },
-  
+
   "distributed": {
     "launcher": "torchrun",
     "nnodes": 2,
     "nproc_per_node": 2
   }
 }
-
diff --git a/examples/k8s-configs/minimal/torchrun-nvidia-gpu-minimal.json b/examples/k8s-configs/minimal/torchrun-nvidia-gpu-minimal.json
index 444e037f..2a6e39c5 100644
--- a/examples/k8s-configs/minimal/torchrun-nvidia-gpu-minimal.json
+++ b/examples/k8s-configs/minimal/torchrun-nvidia-gpu-minimal.json
@@ -2,18 +2,17 @@
   "_comment": "Minimal NVIDIA GPU Config - 4 GPUs with torchrun",
   "_description": "Uses built-in NVIDIA optimizations and presets",
   "_use_case": "Quick NVIDIA GPU testing with minimal configuration",
-  
+
   "gpu_vendor": "NVIDIA",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 4
   },
-  
+
   "distributed": {
     "launcher": "torchrun",
     "nnodes": 1,
     "nproc_per_node": 4
   }
 }
-
diff --git a/examples/k8s-configs/minimal/torchrun-single-gpu-minimal.json b/examples/k8s-configs/minimal/torchrun-single-gpu-minimal.json
index 5041003e..3ed65bdb 100644
--- a/examples/k8s-configs/minimal/torchrun-single-gpu-minimal.json
+++ b/examples/k8s-configs/minimal/torchrun-single-gpu-minimal.json
@@ -2,18 +2,17 @@
   "_comment": "Minimal Single GPU Config - Only Essential Fields",
   "_description": "Uses built-in defaults for everything except GPU count",
   "_use_case": "Quick single GPU testing with minimal configuration",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 1
   },
-  
+
   "distributed": {
     "launcher": "torchrun",
     "nnodes": 1,
     "nproc_per_node": 1
   }
 }
-
diff --git a/examples/k8s-configs/minimal/torchtitan-single-node-minimal.json b/examples/k8s-configs/minimal/torchtitan-single-node-minimal.json
index 9605f09c..577ea998 100644
--- a/examples/k8s-configs/minimal/torchtitan-single-node-minimal.json
+++ b/examples/k8s-configs/minimal/torchtitan-single-node-minimal.json
@@ -3,20 +3,19 @@
   "_description": "Uses torchtitan with Tensor Parallelism for single-node training",
   "_use_case": "Quick LLM pre-training with torchtitan (8B model)",
   "_reference": "https://github.com/pytorch/torchtitan",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 8,
     "memory": "256Gi",
     "cpu": "64"
   },
-  
+
   "distributed": {
     "launcher": "torchtitan",
     "nnodes": 1,
     "nproc_per_node": 8
   }
 }
-
diff --git a/examples/k8s-configs/minimal/vllm-single-node-minimal.json b/examples/k8s-configs/minimal/vllm-single-node-minimal.json
index ed0de4ac..102db25e 100644
--- a/examples/k8s-configs/minimal/vllm-single-node-minimal.json
+++ b/examples/k8s-configs/minimal/vllm-single-node-minimal.json
@@ -3,22 +3,22 @@
   "_description": "vLLM inference with Tensor Parallelism for single-node",
   "_use_case": "LLM inference serving with vLLM",
   "_reference": "https://github.com/vllm-project/vllm",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "k8s": {
     "gpu_count": 4,
     "memory": "128Gi",
     "cpu": "32"
   },
-  
+
   "distributed": {
     "launcher": "vllm",
     "nnodes": 1,
     "nproc_per_node": 4
   },
-  
+
   "context": {
     "env_vars": {
       "VLLM_KV_CACHE_SIZE": "0.7",
@@ -26,4 +26,3 @@
     }
   }
 }
-
diff --git a/examples/profiling-configs/rocprofv3_multi_gpu.json b/examples/profiling-configs/rocprofv3_multi_gpu.json
index 2b2b250d..a830e78b 100644
--- a/examples/profiling-configs/rocprofv3_multi_gpu.json
+++ b/examples/profiling-configs/rocprofv3_multi_gpu.json
@@ -3,10 +3,10 @@
   "_description": "Configuration for distributed training on 4 GPUs with ROCm profiling tools on SLURM",
   "_use_case": "Multi-GPU training with communication profiling, power monitoring, and VRAM tracking",
   "_note": "Using 'amd-rccl' partition. Change to your cluster's partition name if different (e.g., 'gpu', 'compute').",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 1,
@@ -15,12 +15,12 @@
     "output_dir": "./slurm_results",
     "exclusive": false
   },
-  
+
   "distributed": {
     "launcher": "torchrun",
     "nproc_per_node": 4
   },
-  
+
   "tools": [
     {
       "name": "rocprofv3_communication",
@@ -43,10 +43,10 @@
       }
     }
   ],
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "8"
   },
-  
+
   "debug": false
 }
diff --git a/examples/profiling-configs/rocprofv3_multi_node.json b/examples/profiling-configs/rocprofv3_multi_node.json
index ad87c814..e648bef2 100644
--- a/examples/profiling-configs/rocprofv3_multi_node.json
+++ b/examples/profiling-configs/rocprofv3_multi_node.json
@@ -3,10 +3,10 @@
   "_description": "Configuration for distributed training across multiple nodes with ROCm profiling tools on SLURM",
   "_use_case": "Large-scale multi-node training with communication profiling, power monitoring, and VRAM tracking",
   "_note": "Using 'amd-rccl' partition. Change to your cluster's partition name if different (e.g., 'gpu', 'compute').",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 2,
@@ -15,13 +15,13 @@
     "output_dir": "./slurm_results",
     "exclusive": true
   },
-  
+
   "distributed": {
     "launcher": "torchrun",
     "nnodes": 2,
     "nproc_per_node": 4
   },
-  
+
   "tools": [
     {
       "name": "rocprofv3_communication",
@@ -45,12 +45,12 @@
       }
     }
   ],
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "8",
     "NCCL_IB_DISABLE": "0",
     "NCCL_SOCKET_IFNAME": "eth0"
   },
-  
+
   "debug": false
 }
diff --git a/examples/slurm-configs/basic/01-single-node-single-gpu.json b/examples/slurm-configs/basic/01-single-node-single-gpu.json
index c0877717..5e2f073b 100644
--- a/examples/slurm-configs/basic/01-single-node-single-gpu.json
+++ b/examples/slurm-configs/basic/01-single-node-single-gpu.json
@@ -3,10 +3,10 @@
   "_description": "Configuration for running a model on a single GPU on a SLURM cluster",
   "_use_case": "Testing, small models, quick benchmarks (single GPU, no distributed execution)",
   "_note": "Using 'amd-rccl' partition. Change to your cluster's partition name if different (e.g., 'gpu', 'compute').",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 1,
@@ -15,11 +15,10 @@
     "output_dir": "./slurm_results",
     "exclusive": false
   },
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "8"
   },
-  
+
   "debug": false
 }
-
diff --git a/examples/slurm-configs/basic/02-single-node-multi-gpu.json b/examples/slurm-configs/basic/02-single-node-multi-gpu.json
index a0e5b6ae..0763127c 100644
--- a/examples/slurm-configs/basic/02-single-node-multi-gpu.json
+++ b/examples/slurm-configs/basic/02-single-node-multi-gpu.json
@@ -3,10 +3,10 @@
   "_description": "Configuration for running a model on 8 GPUs on a single SLURM node",
   "_use_case": "Single-node distributed workload, large models requiring multiple GPUs",
   "_note": "Using 'amd-rccl' partition. Change to your cluster's partition name if different.",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 1,
@@ -15,19 +15,18 @@
     "output_dir": "./slurm_results",
     "exclusive": true
   },
-  
+
   "distributed": {
     "launcher": "torchrun",
     "backend": "nccl",
     "nnodes": 1,
     "nproc_per_node": 8
   },
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "8",
     "NCCL_DEBUG": "WARN"
   },
-  
+
   "debug": false
 }
-
diff --git a/examples/slurm-configs/basic/03-multi-node-basic.json b/examples/slurm-configs/basic/03-multi-node-basic.json
index 006890a7..e6f96584 100644
--- a/examples/slurm-configs/basic/03-multi-node-basic.json
+++ b/examples/slurm-configs/basic/03-multi-node-basic.json
@@ -3,10 +3,10 @@
   "_description": "Configuration for distributed workload across 2 nodes with 8 GPUs per node (16 GPUs total)",
   "_use_case": "Multi-node distributed execution for large models (training or inference)",
   "_note": "Target is auto-detected as 'slurm' from presence of 'slurm' config section",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 2,
@@ -16,7 +16,7 @@
     "exclusive": true,
     "network_interface": "eth0"
   },
-  
+
   "distributed": {
     "launcher": "torchrun",
     "backend": "nccl",
@@ -24,7 +24,7 @@
     "nnodes": 2,
     "nproc_per_node": 8
   },
-  
+
   "env_vars": {
     "NCCL_DEBUG": "WARN",
     "NCCL_DEBUG_SUBSYS": "INIT,NET",
@@ -41,7 +41,6 @@
     "HSA_FORCE_FINE_GRAIN_PCIE": "1",
     "RCCL_ENABLE_HIPGRAPH": "0"
   },
-  
+
   "debug": false
 }
-
diff --git a/examples/slurm-configs/basic/04-multi-node-advanced.json b/examples/slurm-configs/basic/04-multi-node-advanced.json
index 1708f078..f25bb934 100644
--- a/examples/slurm-configs/basic/04-multi-node-advanced.json
+++ b/examples/slurm-configs/basic/04-multi-node-advanced.json
@@ -3,10 +3,10 @@
   "_description": "Configuration for large-scale distributed workloads with advanced options",
   "_use_case": "Production-scale multi-node training with custom workspace and results collection",
   "_note": "Using 'amd-rccl' partition. Adjust for your cluster if needed.",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 4,
@@ -25,7 +25,7 @@
       "openmpi/4.1.4"
     ]
   },
-  
+
   "distributed": {
     "launcher": "torchrun",
     "backend": "nccl",
@@ -33,7 +33,7 @@
     "nnodes": 4,
     "nproc_per_node": 8
   },
-  
+
   "env_vars": {
     "NCCL_DEBUG": "INFO",
     "NCCL_DEBUG_SUBSYS": "INIT,NET",
@@ -53,9 +53,9 @@
     "NCCL_BUFFSIZE": "8388608",
     "NCCL_P2P_LEVEL": "NVL"
   },
-  
+
   "shared_data": "/shared/datasets",
-  
+
   "_notes": {
     "description": "Advanced configuration with InfiniBand, shared storage, and custom SLURM settings",
     "modules": "Load required environment modules before job execution",
@@ -65,7 +65,6 @@
     "shared_workspace": "Shared filesystem for job execution (NFS/Lustre)",
     "shared_data": "Shared dataset location accessible from all nodes"
   },
-  
+
   "debug": false
 }
-
diff --git a/examples/slurm-configs/basic/05-vllm-single-node.json b/examples/slurm-configs/basic/05-vllm-single-node.json
index 7d77c4df..01d15eb8 100644
--- a/examples/slurm-configs/basic/05-vllm-single-node.json
+++ b/examples/slurm-configs/basic/05-vllm-single-node.json
@@ -3,10 +3,10 @@
   "_description": "vLLM inference with tensor parallelism on single node",
   "_use_case": "High-throughput LLM inference on single node with multiple GPUs",
   "_note": "vLLM uses tensor parallelism to split model across GPUs",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 1,
@@ -15,15 +15,15 @@
     "output_dir": "./slurm_results",
     "exclusive": true
   },
-  
+
   "distributed": {
     "launcher": "vllm",
     "nnodes": 1,
     "nproc_per_node": 4
   },
-  
+
   "pre_scripts": [],
-  
+
   "env_vars": {
     "VLLM_ALLOW_LONG_MAX_MODEL_LEN": "1",
     "VLLM_USE_MODELSCOPE": "False",
@@ -34,4 +34,3 @@
     "NCCL_DEBUG": "WARN"
   }
 }
-
diff --git a/examples/slurm-configs/basic/06-vllm-multi-node.json b/examples/slurm-configs/basic/06-vllm-multi-node.json
index d51262db..3763b0ad 100644
--- a/examples/slurm-configs/basic/06-vllm-multi-node.json
+++ b/examples/slurm-configs/basic/06-vllm-multi-node.json
@@ -10,10 +10,10 @@
     "Better throughput - parallel processing",
     "Ideal for benchmarking and production serving"
   ],
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 2,
@@ -21,13 +21,13 @@
     "time": "00:45:00",
     "output_dir": "./slurm_results",
     "exclusive": true,
-    
+
     "_comment_node_check": "Preflight GPU health check (helps avoid OOM from stale processes)",
     "enable_node_check": true,
     "auto_cleanup_nodes": false,
     "verbose_node_check": false
   },
-  
+
   "distributed": {
     "launcher": "vllm",
     "nnodes": 2,
@@ -36,25 +36,25 @@
     "port": 29500,
     "_note": "Data Parallelism: Each node runs independently, no cross-node communication needed"
   },
-  
+
   "pre_scripts": [],
-  
+
   "env_vars": {
     "VLLM_ALLOW_LONG_MAX_MODEL_LEN": "1",
     "VLLM_USE_MODELSCOPE": "False",
     "VLLM_WORKER_MULTIPROC_METHOD": "spawn",
-    
+
     "_comment_memory": "Higher GPU utilization for Data Parallelism (no PP overhead)",
     "VLLM_KV_CACHE_SIZE": "0.8",
     "PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True",
     "HSA_FORCE_FINE_GRAIN_PCIE": "1",
-    
+
     "_comment_timeouts": "Reduced timeouts for faster failure detection in DP mode",
     "NCCL_TIMEOUT": "300",
     "VLLM_ENGINE_ITERATION_TIMEOUT_S": "120",
     "RAY_health_check_timeout_ms": "30000",
     "RAY_gcs_rpc_server_reconnect_timeout_s": "60",
-    
+
     "_comment_nccl": "NCCL settings for within-node tensor parallelism",
     "NCCL_DEBUG": "WARN",
     "NCCL_DEBUG_SUBSYS": "INIT,NET",
@@ -63,4 +63,3 @@
     "TORCH_NCCL_HIGH_PRIORITY": "1"
   }
 }
-
diff --git a/examples/slurm-configs/basic/07-sglang-single-node.json b/examples/slurm-configs/basic/07-sglang-single-node.json
index 8aaae928..1acef300 100644
--- a/examples/slurm-configs/basic/07-sglang-single-node.json
+++ b/examples/slurm-configs/basic/07-sglang-single-node.json
@@ -3,10 +3,10 @@
   "_description": "SGLang inference with tensor parallelism on single node",
   "_use_case": "High-throughput LLM inference on single node with multiple GPUs",
   "_note": "SGLang uses tensor parallelism to split model across GPUs",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 1,
@@ -15,13 +15,13 @@
     "output_dir": "./slurm_results",
     "exclusive": true
   },
-  
+
   "distributed": {
     "launcher": "sglang",
     "nnodes": 1,
     "nproc_per_node": 4
   },
-  
+
   "env_vars": {
     "SGLANG_ALLOW_LONG_MAX_MODEL_LEN": "1",
     "SGLANG_USE_MODELSCOPE": "False",
@@ -38,4 +38,3 @@
     "RAY_BACKEND_LOG_LEVEL": "warning"
   }
 }
-
diff --git a/examples/slurm-configs/basic/08-sglang-multi-node.json b/examples/slurm-configs/basic/08-sglang-multi-node.json
index 8485b93c..8db86870 100644
--- a/examples/slurm-configs/basic/08-sglang-multi-node.json
+++ b/examples/slurm-configs/basic/08-sglang-multi-node.json
@@ -3,10 +3,10 @@
   "_description": "SGLang inference with tensor + data parallelism across nodes",
   "_use_case": "High-throughput LLM inference requiring multiple nodes",
   "_note": "SGLang uses tensor parallelism within nodes and data parallelism across nodes",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 2,
@@ -15,7 +15,7 @@
     "output_dir": "./slurm_results",
     "exclusive": true
   },
-  
+
   "distributed": {
     "launcher": "sglang",
     "nnodes": 2,
@@ -23,7 +23,7 @@
     "backend": "nccl",
     "port": 29500
   },
-  
+
   "env_vars": {
     "SGLANG_ALLOW_LONG_MAX_MODEL_LEN": "1",
     "SGLANG_USE_MODELSCOPE": "False",
@@ -44,4 +44,3 @@
     "RAY_BACKEND_LOG_LEVEL": "warning"
   }
 }
-
diff --git a/examples/slurm-configs/basic/09-megatron-lm-multi-node.json b/examples/slurm-configs/basic/09-megatron-lm-multi-node.json
index 84e3c3f6..b072bb71 100644
--- a/examples/slurm-configs/basic/09-megatron-lm-multi-node.json
+++ b/examples/slurm-configs/basic/09-megatron-lm-multi-node.json
@@ -3,10 +3,10 @@
   "_description": "Large-scale transformer training with Megatron-LM on SLURM",
   "_use_case": "Multi-node Megatron-LM training with tensor and pipeline parallelism",
   "_reference": "https://github.com/NVIDIA/Megatron-LM",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "gpu",
     "account": "research",
@@ -15,20 +15,19 @@
     "time": "24:00:00",
     "mem": "256G"
   },
-  
+
   "distributed": {
     "launcher": "megatron",
     "nnodes": 4,
     "nproc_per_node": 8,
     "master_port": 29500
   },
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "16",
     "NCCL_DEBUG": "INFO",
     "NCCL_IB_DISABLE": "0"
   },
-  
+
   "debug": false
 }
-
diff --git a/examples/slurm-configs/basic/cluster-amd-rccl.json b/examples/slurm-configs/basic/cluster-amd-rccl.json
index e70f8721..5cf0c623 100644
--- a/examples/slurm-configs/basic/cluster-amd-rccl.json
+++ b/examples/slurm-configs/basic/cluster-amd-rccl.json
@@ -8,10 +8,10 @@
     "default_qos": "normal",
     "discovery_command": "sinfo -o '%P %.5a %.10l %.6D %.6t %N %G'"
   },
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "account": "amd-rccl",
@@ -22,12 +22,12 @@
     "output_dir": "./slurm_results",
     "exclusive": true
   },
-  
+
   "distributed": {
     "backend": "nccl",
     "port": 29500
   },
-  
+
   "env_vars": {
     "NCCL_DEBUG": "WARN",
     "NCCL_SOCKET_IFNAME": "eth0",
@@ -35,7 +35,6 @@
     "MIOPEN_FIND_MODE": "1",
     "MIOPEN_USER_DB_PATH": "/tmp/.miopen"
   },
-  
+
   "debug": false
 }
-
diff --git a/examples/slurm-configs/basic/sglang-disagg-custom-split.json b/examples/slurm-configs/basic/sglang-disagg-custom-split.json
index f38bcf64..83be85ef 100644
--- a/examples/slurm-configs/basic/sglang-disagg-custom-split.json
+++ b/examples/slurm-configs/basic/sglang-disagg-custom-split.json
@@ -11,10 +11,10 @@
     "custom_override": "4 prefill + 2 decode (4/2 split)"
   },
   "_note": "Custom split allows optimization for prompt-heavy workloads",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 7,
@@ -23,7 +23,7 @@
     "output_dir": "./slurm_results",
     "exclusive": true
   },
-  
+
   "distributed": {
     "launcher": "sglang-disagg",
     "nnodes": 7,
@@ -35,7 +35,7 @@
       "decode_nodes": 2
     }
   },
-  
+
   "env_vars": {
     "SGLANG_ALLOW_LONG_MAX_MODEL_LEN": "1",
     "SGLANG_USE_MODELSCOPE": "False",
@@ -56,4 +56,3 @@
     "RAY_BACKEND_LOG_LEVEL": "warning"
   }
 }
-
diff --git a/examples/slurm-configs/basic/sglang-disagg-multi-node.json b/examples/slurm-configs/basic/sglang-disagg-multi-node.json
index 7dfbae19..4b24314a 100644
--- a/examples/slurm-configs/basic/sglang-disagg-multi-node.json
+++ b/examples/slurm-configs/basic/sglang-disagg-multi-node.json
@@ -10,10 +10,10 @@
     "tensor_parallel": "8 GPUs per node"
   },
   "_note": "SGLang Disaggregated separates prefill and decode into specialized clusters connected via Mooncake",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 5,
@@ -22,7 +22,7 @@
     "output_dir": "./slurm_results",
     "exclusive": true
   },
-  
+
   "distributed": {
     "launcher": "sglang-disagg",
     "nnodes": 5,
@@ -30,7 +30,7 @@
     "backend": "nccl",
     "port": 29500
   },
-  
+
   "env_vars": {
     "SGLANG_ALLOW_LONG_MAX_MODEL_LEN": "1",
     "SGLANG_USE_MODELSCOPE": "False",
@@ -53,4 +53,3 @@
     "MOONCAKE_TEST_MODE": "0"
   }
 }
-
diff --git a/examples/slurm-configs/minimal/deepspeed-minimal.json b/examples/slurm-configs/minimal/deepspeed-minimal.json
index ae105389..a92287a3 100644
--- a/examples/slurm-configs/minimal/deepspeed-minimal.json
+++ b/examples/slurm-configs/minimal/deepspeed-minimal.json
@@ -2,23 +2,23 @@
   "_comment": "DeepSpeed Config - Uses deepspeed launcher",
   "_description": "DeepSpeed with ZeRO-1 optimization",
   "_use_case": "Test DeepSpeed distributed training on SLURM (training-specific launcher)",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 1,
     "gpus_per_node": 4,
     "time": "02:00:00"
   },
-  
+
   "distributed": {
     "launcher": "deepspeed",
     "nnodes": 1,
     "nproc_per_node": 4
   },
-  
+
   "env_vars": {
     "DEEPSPEED_LAUNCHER": "deepspeed"
   }
diff --git a/examples/slurm-configs/minimal/megatron-lm-minimal.json b/examples/slurm-configs/minimal/megatron-lm-minimal.json
index 9480359e..828daee9 100644
--- a/examples/slurm-configs/minimal/megatron-lm-minimal.json
+++ b/examples/slurm-configs/minimal/megatron-lm-minimal.json
@@ -2,23 +2,23 @@
   "_comment": "Megatron-LM Minimal Config - Dedicated launcher support",
   "_description": "Megatron-LM with automated tensor/pipeline parallelism setup",
   "_use_case": "Large-scale transformer training with Megatron-LM on SLURM",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 1,
     "gpus_per_node": 2,
     "time": "02:00:00"
   },
-  
+
   "distributed": {
     "launcher": "megatron",
     "nnodes": 1,
     "nproc_per_node": 2
   },
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "8"
   }
diff --git a/examples/slurm-configs/minimal/sglang-disagg-minimal.json b/examples/slurm-configs/minimal/sglang-disagg-minimal.json
index ee4ad9f2..03bce46c 100644
--- a/examples/slurm-configs/minimal/sglang-disagg-minimal.json
+++ b/examples/slurm-configs/minimal/sglang-disagg-minimal.json
@@ -2,21 +2,20 @@
   "_comment": "Minimal SGLang Disaggregated configuration - 3 nodes minimum",
   "_description": "SGLang disaggregated inference with 3 nodes (1 proxy + 1 prefill + 1 decode)",
   "_architecture": "Node 0: Proxy, Node 1: Prefill, Node 2: Decode",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-    
+
   "slurm": {
     "partition": "gpu",
     "nodes": 3,
     "gpus_per_node": 1,
     "time": "02:00:00"
   },
-  
+
   "distributed": {
     "launcher": "sglang-disagg",
     "nnodes": 3,
     "nproc_per_node": 1
   }
 }
-
diff --git a/examples/slurm-configs/minimal/sglang-multi-node-minimal.json b/examples/slurm-configs/minimal/sglang-multi-node-minimal.json
index 057b5004..7c318d19 100644
--- a/examples/slurm-configs/minimal/sglang-multi-node-minimal.json
+++ b/examples/slurm-configs/minimal/sglang-multi-node-minimal.json
@@ -1,21 +1,20 @@
 {
   "_comment": "Minimal SGLang multi-node configuration",
   "_description": "SGLang inference with 2 nodes, 4 GPUs per node",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-    
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 2,
     "gpus_per_node": 4,
     "time": "04:00:00"
   },
-  
+
   "distributed": {
     "launcher": "sglang",
     "nnodes": 2,
     "nproc_per_node": 4
   }
 }
-
diff --git a/examples/slurm-configs/minimal/sglang-single-node-minimal.json b/examples/slurm-configs/minimal/sglang-single-node-minimal.json
index 7e2eae97..b0b306dc 100644
--- a/examples/slurm-configs/minimal/sglang-single-node-minimal.json
+++ b/examples/slurm-configs/minimal/sglang-single-node-minimal.json
@@ -1,21 +1,20 @@
 {
   "_comment": "Minimal SGLang single-node configuration",
   "_description": "SGLang inference with 4 GPUs tensor parallelism",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 1,
     "gpus_per_node": 4,
     "time": "02:00:00"
   },
-  
+
   "distributed": {
     "launcher": "sglang",
     "nnodes": 1,
     "nproc_per_node": 4
   }
 }
-
diff --git a/examples/slurm-configs/minimal/torchrun-multi-gpu-minimal.json b/examples/slurm-configs/minimal/torchrun-multi-gpu-minimal.json
index c8479d58..721b47e6 100644
--- a/examples/slurm-configs/minimal/torchrun-multi-gpu-minimal.json
+++ b/examples/slurm-configs/minimal/torchrun-multi-gpu-minimal.json
@@ -1,7 +1,7 @@
 {
   "_comment": "Minimal multi-GPU SLURM configuration (8 GPUs, single node)",
   "_note": "Using 'amd-rccl' partition (default for this cluster)",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
 
@@ -14,4 +14,3 @@
     "launcher": "torchrun"
   }
 }
-
diff --git a/examples/slurm-configs/minimal/torchrun-multi-node-minimal.json b/examples/slurm-configs/minimal/torchrun-multi-node-minimal.json
index e00262bf..379a0012 100644
--- a/examples/slurm-configs/minimal/torchrun-multi-node-minimal.json
+++ b/examples/slurm-configs/minimal/torchrun-multi-node-minimal.json
@@ -4,7 +4,7 @@
 
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 2,
@@ -15,4 +15,3 @@
     "launcher": "torchrun"
   }
 }
-
diff --git a/examples/slurm-configs/minimal/torchrun-single-gpu-minimal.json b/examples/slurm-configs/minimal/torchrun-single-gpu-minimal.json
index 4151f94a..fafc7d86 100644
--- a/examples/slurm-configs/minimal/torchrun-single-gpu-minimal.json
+++ b/examples/slurm-configs/minimal/torchrun-single-gpu-minimal.json
@@ -4,11 +4,10 @@
 
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "gpus_per_node": 1,
     "time": "01:00:00"
   }
 }
-
diff --git a/examples/slurm-configs/minimal/torchtitan-multi-node-minimal.json b/examples/slurm-configs/minimal/torchtitan-multi-node-minimal.json
index 0b227a99..9a3dba5f 100644
--- a/examples/slurm-configs/minimal/torchtitan-multi-node-minimal.json
+++ b/examples/slurm-configs/minimal/torchtitan-multi-node-minimal.json
@@ -5,7 +5,7 @@
 
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 4,
@@ -14,11 +14,10 @@
     "mem": "512G",
     "constraint": "MI300X"
   },
-  
+
   "distributed": {
     "launcher": "torchtitan",
     "nnodes": 4,
     "nproc_per_node": 8
   }
 }
-
diff --git a/examples/slurm-configs/minimal/torchtitan-single-node-minimal.json b/examples/slurm-configs/minimal/torchtitan-single-node-minimal.json
index 4b7f532a..4ed9c837 100644
--- a/examples/slurm-configs/minimal/torchtitan-single-node-minimal.json
+++ b/examples/slurm-configs/minimal/torchtitan-single-node-minimal.json
@@ -5,7 +5,7 @@
 
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 1,
@@ -13,9 +13,8 @@
     "time": "24:00:00",
     "mem": "256G"
   },
-  
+
   "distributed": {
     "launcher": "torchtitan"
   }
 }
-
diff --git a/examples/slurm-configs/minimal/vllm-multi-node-minimal.json b/examples/slurm-configs/minimal/vllm-multi-node-minimal.json
index 0a77b5ea..ff970f6f 100644
--- a/examples/slurm-configs/minimal/vllm-multi-node-minimal.json
+++ b/examples/slurm-configs/minimal/vllm-multi-node-minimal.json
@@ -1,10 +1,10 @@
 {
   "_comment": "Minimal vLLM multi-node configuration",
   "_description": "vLLM inference with 2 nodes, 4 GPUs per node",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 2,
@@ -13,13 +13,13 @@
     "enable_node_check": true,
     "auto_cleanup_nodes": false
   },
-  
+
   "distributed": {
     "launcher": "vllm",
     "nnodes": 2,
     "nproc_per_node": 4
   },
-  
+
   "env_vars": {
     "VLLM_KV_CACHE_SIZE": "0.5",
     "PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True",
@@ -27,7 +27,6 @@
     "VLLM_ENGINE_ITERATION_TIMEOUT_S": "180",
     "RAY_health_check_timeout_ms": "60000"
   },
-  
+
   "pre_scripts": []
 }
-
diff --git a/examples/slurm-configs/minimal/vllm-single-node-minimal.json b/examples/slurm-configs/minimal/vllm-single-node-minimal.json
index 14c9b843..2072ac36 100644
--- a/examples/slurm-configs/minimal/vllm-single-node-minimal.json
+++ b/examples/slurm-configs/minimal/vllm-single-node-minimal.json
@@ -4,25 +4,24 @@
 
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 1,
     "gpus_per_node": 4,
     "time": "02:00:00"
   },
-  
+
   "distributed": {
     "launcher": "vllm",
     "nnodes": 1,
     "nproc_per_node": 4
   },
-  
+
   "env_vars": {
     "VLLM_KV_CACHE_SIZE": "0.7",
     "PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True"
   },
-  
+
   "pre_scripts": []
 }
-
diff --git a/pyproject.toml b/pyproject.toml
index 0c83f30a..97be986d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,8 @@ dependencies = [
   "click>=8.0.0",
   "jinja2>=3.0.0",
   "pyyaml>=6.0",
+  "hydra-core>=1.3",
+  "omegaconf>=2.3",
 ]
 classifiers = [
   "Programming Language :: Python :: 3",
@@ -80,6 +82,7 @@ all = [
 [tool.hatch.build.targets.wheel.force-include]
 "src/madengine/scripts" = "madengine/scripts"
 "src/madengine/deployment/templates" = "madengine/deployment/templates"
+"src/madengine/configs" = "madengine/configs"
 
 [tool.hatch.version]
 source = "versioningit"
diff --git a/setup.py b/setup.py
index dab8c8c4..7a6bac74 100644
--- a/setup.py
+++ b/setup.py
@@ -27,21 +27,23 @@
     print("Install it using: pip install setuptools")
     sys.exit(1)
 
+
 def read_readme(readme_file="README.md"):
     """Read README.md file for long description."""
     readme_path = Path(__file__).parent / readme_file
     if readme_path.exists():
         with open(readme_path, "r", encoding="utf-8") as f:
             return f.read()
-    
+
     # Fallback to README.md if specified file doesn't exist
     fallback_path = Path(__file__).parent / "README.md"
     if fallback_path.exists() and readme_file != "README.md":
         with open(fallback_path, "r", encoding="utf-8") as f:
             return f.read()
-    
+
     return ""
 
+
 def get_config_from_pyproject():
     """Read configuration from pyproject.toml."""
     try:
@@ -52,30 +54,32 @@ def get_config_from_pyproject():
         except ImportError:
             try:
                 import toml as tomllib_alt
+
                 def load(f):
-                    if hasattr(f, 'read'):
+                    if hasattr(f, "read"):
                         content = f.read()
                         if isinstance(content, bytes):
-                            content = content.decode('utf-8')
+                            content = content.decode("utf-8")
                         return tomllib_alt.loads(content)
                     else:
                         return tomllib_alt.load(f)
+
                 tomllib.load = load
             except ImportError:
                 print("Warning: No TOML library found. Using fallback configuration.")
                 return get_fallback_config()
-    
+
     pyproject_path = Path(__file__).parent / "pyproject.toml"
     if not pyproject_path.exists():
         print("Warning: pyproject.toml not found. Using fallback configuration.")
         return get_fallback_config()
-    
+
     try:
         with open(pyproject_path, "rb") as f:
             data = tomllib.load(f)
-        
+
         project = data.get("project", {})
-        
+
         # Extract configuration
         config = {
             "name": project.get("name", "madengine"),
@@ -89,13 +93,14 @@ def load(f):
             "scripts": project.get("scripts", {}),
             "readme": project.get("readme", "README.md"),
         }
-        
+
         return config
-        
+
     except Exception as e:
         print(f"Warning: Could not read pyproject.toml: {e}")
         return get_fallback_config()
 
+
 def get_fallback_config():
     """Fallback configuration if pyproject.toml cannot be read."""
     return {
@@ -103,14 +108,26 @@ def get_fallback_config():
         "description": "MAD Engine is a set of interfaces to run various AI models from public MAD.",
         "authors": [{"name": "Advanced Micro Devices", "email": "mad.support@amd.com"}],
         "dependencies": [
-            "pandas", "GitPython", "jsondiff", "sqlalchemy", "setuptools-rust",
-            "paramiko", "tqdm", "pytest",
-            "typing-extensions", "pymongo", "toml",
+            "pandas",
+            "GitPython",
+            "jsondiff",
+            "sqlalchemy",
+            "setuptools-rust",
+            "paramiko",
+            "tqdm",
+            "pytest",
+            "typing-extensions",
+            "pymongo",
+            "toml",
         ],
         "optional_dependencies": {
             "dev": [
-                "pytest", "pytest-cov", "pytest-xdist", "pytest-timeout",
-                "pytest-mock", "pytest-asyncio",
+                "pytest",
+                "pytest-cov",
+                "pytest-xdist",
+                "pytest-timeout",
+                "pytest-mock",
+                "pytest-asyncio",
             ]
         },
         "requires_python": ">=3.8",
@@ -123,52 +140,58 @@ def get_fallback_config():
             "Homepage": "https://github.com/ROCm/madengine",
             "Issues": "https://github.com/ROCm/madengine/issues",
         },
-        "scripts": {
-            "madengine": "madengine.cli.app:cli_main"
-        },
+        "scripts": {"madengine": "madengine.cli.app:cli_main"},
     }
 
+
 def get_version():
     """Get version from git tags or fallback to a default."""
     try:
         import subprocess
         import re
-        
+
         # Try to get version from git describe first (more accurate)
         try:
             result = subprocess.run(
                 ["git", "describe", "--tags", "--dirty", "--always", "--long"],
-                capture_output=True, text=True, timeout=10, cwd=Path(__file__).parent
+                capture_output=True,
+                text=True,
+                timeout=10,
+                cwd=Path(__file__).parent,
             )
             if result.returncode == 0:
                 version_str = result.stdout.strip()
-                
+
                 # Handle case where there are no tags yet
-                if not version_str or len(version_str.split('-')) < 3:
+                if not version_str or len(version_str.split("-")) < 3:
                     # Try to get just the commit hash
                     result = subprocess.run(
                         ["git", "rev-parse", "--short", "HEAD"],
-                        capture_output=True, text=True, timeout=10, cwd=Path(__file__).parent
+                        capture_output=True,
+                        text=True,
+                        timeout=10,
+                        cwd=Path(__file__).parent,
                     )
                     if result.returncode == 0:
                         commit = result.stdout.strip()
                         # Check if dirty
                         dirty_result = subprocess.run(
                             ["git", "diff-index", "--quiet", "HEAD", "--"],
-                            capture_output=True, cwd=Path(__file__).parent
+                            capture_output=True,
+                            cwd=Path(__file__).parent,
                         )
                         is_dirty = dirty_result.returncode != 0
                         if is_dirty:
                             return f"2.0.0.dev0+g{commit}.dirty"
                         else:
                             return f"2.0.0.dev0+g{commit}"
-                
+
                 # Clean up the version string to be PEP 440 compliant
-                if version_str.startswith('v'):
+                if version_str.startswith("v"):
                     version_str = version_str[1:]
-                
+
                 # Handle patterns like "1.0.0-5-g1234567" or "1.0.0-5-g1234567-dirty"
-                match = re.match(r'^([^-]+)-(\d+)-g([a-f0-9]+)(-dirty)?$', version_str)
+                match = re.match(r"^([^-]+)-(\d+)-g([a-f0-9]+)(-dirty)?$", version_str)
                 if match:
                     base_version, distance, commit, dirty = match.groups()
                     if distance == "0":
@@ -183,40 +206,44 @@ def get_version():
                         if dirty:
                             version_str += ".dirty"
                         return version_str
-                
+
                 # Handle case where we just have a commit hash (no tags)
-                if re.match(r'^[a-f0-9]+(-dirty)?$', version_str):
-                    clean_hash = version_str.replace('-dirty', '')
-                    if '-dirty' in version_str:
+                if re.match(r"^[a-f0-9]+(-dirty)?$", version_str):
+                    clean_hash = version_str.replace("-dirty", "")
+                    if "-dirty" in version_str:
                         return f"2.0.0.dev0+g{clean_hash}.dirty"
                     else:
                         return f"2.0.0.dev0+g{clean_hash}"
-                
+
                 return version_str
-                
+
         except (subprocess.SubprocessError, FileNotFoundError):
             pass
-        
+
         # Fallback to short commit hash
         result = subprocess.run(
             ["git", "rev-parse", "--short", "HEAD"],
-            capture_output=True, text=True, timeout=10, cwd=Path(__file__).parent
+            capture_output=True,
+            text=True,
+            timeout=10,
+            cwd=Path(__file__).parent,
         )
         if result.returncode == 0:
             commit = result.stdout.strip()
             return f"2.0.0.dev0+g{commit}"
-            
+
     except Exception:
         pass
-    
+
     # Final fallback
     return "2.0.0.dev0"
 
+
 def main():
     """Main setup function."""
     try:
         config = get_config_from_pyproject()
-        
+
         # Extract author information
         authors = config.get("authors", [])
         if authors:
@@ -225,42 +252,45 @@ def main():
         else:
             author_name = "Advanced Micro Devices"
             author_email = "mad.support@amd.com"
-        
+
         # Extract scripts/entry points
         scripts = config.get("scripts", {})
         entry_points = {"console_scripts": []}
         for script_name, module_path in scripts.items():
             entry_points["console_scripts"].append(f"{script_name}={module_path}")
-        
+
         # Find all packages
         packages = find_packages(where="src")
         if not packages:
             print("Warning: No packages found in src/ directory")
             # Fallback: look for madengine package specifically
             import os
+
             src_path = Path(__file__).parent / "src"
             if (src_path / "madengine").exists():
                 packages = ["madengine"] + [
                     f"madengine.{name}" for name in find_packages(where="src/madengine")
                 ]
-        
+
         # Setup package data to include scripts
         package_data = {"madengine": ["scripts/**/*"]}
-        
+
         # Check if scripts directory exists and add patterns accordingly
         scripts_path = Path(__file__).parent / "src" / "madengine" / "scripts"
         if scripts_path.exists():
             # Add more specific patterns to ensure all script files are included
-            package_data["madengine"].extend([
-                "scripts/*",
-                "scripts/*/*",
-                "scripts/*/*/*",
-                "scripts/*/*/*/*",
-            ])
-        
+            package_data["madengine"].extend(
+                [
+                    "scripts/*",
+                    "scripts/*/*",
+                    "scripts/*/*/*",
+                    "scripts/*/*/*/*",
+                ]
+            )
+
         # Get version
         version = get_version()
-        
+
         # Setup configuration
         setup_kwargs = {
             "name": config["name"],
@@ -284,24 +314,28 @@ def main():
             "zip_safe": False,
             "platforms": ["any"],
         }
-        
+
         # Remove None values to avoid setuptools warnings
         setup_kwargs = {k: v for k, v in setup_kwargs.items() if v is not None}
-        
+
         # Print some info for debugging
-        if len(sys.argv) > 1 and any(arg in sys.argv for arg in ["--version", "--help", "--help-commands"]):
+        if len(sys.argv) > 1 and any(
+            arg in sys.argv for arg in ["--version", "--help", "--help-commands"]
+        ):
             print(f"madengine version: {version}")
             print(f"Found {len(packages)} packages")
             if entry_points and entry_points["console_scripts"]:
                 print(f"Console scripts: {', '.join(entry_points['console_scripts'])}")
-        
+
         setup(**setup_kwargs)
-        
+
     except Exception as e:
         print(f"Error during setup: {e}")
         import traceback
+
         traceback.print_exc()
         sys.exit(1)
 
+
 if __name__ == "__main__":
     main()
diff --git a/src/madengine/__init__.py b/src/madengine/__init__.py
index f121d08e..91a9ea0d 100644
--- a/src/madengine/__init__.py
+++ b/src/madengine/__init__.py
@@ -11,7 +11,7 @@
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
-from importlib.metadata import version, PackageNotFoundError
+from importlib.metadata import PackageNotFoundError, version
 
 try:
     __version__ = version("madengine")
diff --git a/src/madengine/cli/__init__.py b/src/madengine/cli/__init__.py
index 2ac185c2..5c83baa1 100644
--- a/src/madengine/cli/__init__.py
+++ b/src/madengine/cli/__init__.py
@@ -10,26 +10,28 @@
 
 # Import for backward compatibility
 from .app import app, cli_main
-from .constants import ExitCode, VALID_GPU_VENDORS, VALID_GUEST_OS
 from .constants import (
+    DEFAULT_DATA_CONFIG,
     DEFAULT_MANIFEST_FILE,
     DEFAULT_PERF_OUTPUT,
-    DEFAULT_DATA_CONFIG,
-    DEFAULT_TOOLS_CONFIG,
     DEFAULT_TIMEOUT,
+    DEFAULT_TOOLS_CONFIG,
+    VALID_GPU_VENDORS,
+    VALID_GUEST_OS,
+    ExitCode,
 )
 from .utils import (
-    setup_logging,
-    split_comma_separated_tags,
     create_args_namespace,
-    save_summary_with_feedback,
-    display_results_table,
     display_performance_table,
+    display_results_table,
+    save_summary_with_feedback,
+    setup_logging,
+    split_comma_separated_tags,
 )
 from .validators import (
-    validate_additional_context,
     process_batch_manifest,
     process_batch_manifest_entries,
+    validate_additional_context,
 )
 
 __all__ = [
@@ -53,4 +55,3 @@
     "process_batch_manifest",
     "process_batch_manifest_entries",
 ]
-
diff --git a/src/madengine/cli/app.py b/src/madengine/cli/app.py
index 2e761f49..b4871353 100644
--- a/src/madengine/cli/app.py
+++ b/src/madengine/cli/app.py
@@ -8,7 +8,8 @@
 """
 
 import sys
-from importlib.metadata import PackageNotFoundError, version as pkg_version
+from importlib.metadata import PackageNotFoundError
+from importlib.metadata import version as pkg_version
 
 import typer
 from rich.traceback import install
@@ -18,7 +19,7 @@
 except ImportError:
     from typing_extensions import Annotated  # Python 3.8
 
-from .commands import build, run, discover, report_app, database
+from .commands import build, database, discover, report_app, run
 from .constants import ExitCode
 from .utils import console
 
@@ -89,4 +90,3 @@ def cli_main() -> None:
 
 if __name__ == "__main__":
     cli_main()
-
diff --git a/src/madengine/cli/commands/__init__.py b/src/madengine/cli/commands/__init__.py
index f77b432e..c88f662c 100644
--- a/src/madengine/cli/commands/__init__.py
+++ b/src/madengine/cli/commands/__init__.py
@@ -8,10 +8,9 @@
 """
 
 from .build import build
-from .run import run
+from .database import database
 from .discover import discover
 from .report import report_app
-from .database import database
+from .run import run
 
 __all__ = ["build", "run", "discover", "report_app", "database"]
-
diff --git a/src/madengine/cli/commands/build.py b/src/madengine/cli/commands/build.py
index 5b10a65c..f82331c8 100644
--- a/src/madengine/cli/commands/build.py
+++ b/src/madengine/cli/commands/build.py
@@ -29,7 +29,11 @@
     save_summary_with_feedback,
     display_results_table,
 )
-from ..validators import validate_additional_context, process_batch_manifest, process_batch_manifest_entries
+from ..validators import (
+    validate_additional_context,
+    process_batch_manifest,
+    process_batch_manifest_entries,
+)
 
 
 def build(
@@ -40,9 +44,9 @@ def build(
     target_archs: Annotated[
         List[str],
         typer.Option(
-            "--target-archs", 
-            "-a", 
-            help="Target GPU architectures to build for (e.g., gfx908,gfx90a,gfx942). If not specified, builds single image with MAD_SYSTEM_GPU_ARCHITECTURE from additional_context or detected GPU architecture."
+            "--target-archs",
+            "-a",
+            help="Target GPU architectures to build for (e.g., gfx908,gfx90a,gfx942). If not specified, builds single image with MAD_SYSTEM_GPU_ARCHITECTURE from additional_context or detected GPU architecture.",
         ),
     ] = [],
     registry: Annotated[
@@ -69,6 +73,17 @@ def build(
             help="File containing additional context JSON",
         ),
     ] = None,
+    config: Annotated[
+        Optional[List[str]],
+        typer.Option(
+            "--config",
+            help=(
+                "YAML config file and/or Hydra overrides "
+                "(e.g., --config my_job.yaml, --config scheduler=slurm --config launcher=torchrun). "
+                "Cannot be combined with --additional-context or --additional-context-file."
+            ),
+        ),
+    ] = None,
     clean_docker_cache: Annotated[
         bool,
         typer.Option("--clean-docker-cache", help="Rebuild images without using cache"),
@@ -102,15 +117,47 @@ def build(
     # Process tags to handle comma-separated values
     # Supports both: --tags dummy --tags multi AND --tags dummy,multi
     processed_tags = split_comma_separated_tags(tags)
-    
+
+    # --config is mutually exclusive with --additional-context and --additional-context-file
+    if config:
+        if additional_context and additional_context.strip() not in ("", "{}"):
+            console.print(
+                "[red]Error:[/red] --config cannot be used together with --additional-context. "
+                "Use one or the other.",
+                style="bold",
+            )
+            raise typer.Exit(code=ExitCode.INVALID_ARGS.value)
+        if additional_context_file:
+            console.print(
+                "[red]Error:[/red] --config cannot be used together with --additional-context-file. "
+                "Use one or the other.",
+                style="bold",
+            )
+            raise typer.Exit(code=ExitCode.INVALID_ARGS.value)
+
+        from madengine.config import load_config
+
+        config_ctx, config_meta = load_config(config)
+
+        if not processed_tags and config_meta.get("model", {}).get("tags"):
+            processed_tags = config_meta["model"]["tags"]
+        if not registry and config_meta.get("build", {}).get("registry"):
+            registry = config_meta["build"]["registry"]
+        build_meta = config_meta.get("build", {})
+        if not target_archs and build_meta.get("target_archs"):
+            target_archs = build_meta["target_archs"]
+
+        additional_context = repr(config_ctx)
+        additional_context_file = None
+
     # Validate mutually exclusive options
     if batch_manifest and processed_tags:
         console.print(
             "❌ [bold red]Error: Cannot specify both --batch-manifest and --tags options[/bold red]"
         )
         raise typer.Exit(ExitCode.INVALID_ARGS)
-    
-    if additional_context_file and additional_context!="{}":
+
+    if additional_context_file and additional_context != "{}":
         console.print(
             "❌ [bold red]Error: Cannot specify both --additional-context-file and --additional-context options[/bold red]"
         )
@@ -200,7 +247,7 @@ def build(
             console=console,
         ) as progress:
             task = progress.add_task("Initializing build orchestrator...", total=None)
-            
+
             # Use new BuildOrchestrator
             orchestrator = BuildOrchestrator(args)
             progress.update(task, description="Building models...")
@@ -212,12 +259,12 @@ def build(
                 manifest_output=manifest_output,
                 batch_build_metadata=batch_build_metadata,
             )
-            
+
             # Load build summary for display
-            with open(manifest_output, 'r') as f:
+            with open(manifest_output, "r") as f:
                 manifest = json.load(f)
                 build_summary = manifest.get("summary", {})
-            
+
             progress.update(task, description="Build completed!")
 
         # Handle batch manifest post-processing
@@ -240,7 +287,7 @@ def build(
         # Check results and exit with appropriate code
         failed_builds = len(build_summary.get("failed_builds", []))
         successful_builds = len(build_summary.get("successful_builds", []))
-        
+
         if failed_builds == 0:
             console.print(
                 "🎉 [bold green]All builds completed successfully![/bold green]"
@@ -258,9 +305,7 @@ def build(
             raise typer.Exit(ExitCode.BUILD_FAILURE)  # Non-zero exit for CI/CD
         else:
             # All failed
-            console.print(
-                f"💥 [bold red]All builds failed[/bold red]"
-            )
+            console.print(f"💥 [bold red]All builds failed[/bold red]")
             raise typer.Exit(ExitCode.BUILD_FAILURE)
 
     except typer.Exit:
@@ -268,52 +313,52 @@ def build(
     except BuildError as e:
         # Specific build error handling
         console.print(f"💥 [bold red]Build error: {e}[/bold red]")
-        if hasattr(e, 'suggestions') and e.suggestions:
+        if hasattr(e, "suggestions") and e.suggestions:
             console.print("\n💡 [cyan]Suggestions:[/cyan]")
             for suggestion in e.suggestions:
                 console.print(f"  • {suggestion}")
         raise typer.Exit(ExitCode.BUILD_FAILURE)
-        
+
     except ConfigurationError as e:
         # Configuration errors
         console.print(f"⚙️  [bold red]Configuration error: {e}[/bold red]")
-        if hasattr(e, 'suggestions') and e.suggestions:
+        if hasattr(e, "suggestions") and e.suggestions:
             console.print("\n💡 [cyan]Suggestions:[/cyan]")
             for suggestion in e.suggestions:
                 console.print(f"  • {suggestion}")
         raise typer.Exit(ExitCode.INVALID_ARGS)
-        
+
     except DiscoveryError as e:
         # Model discovery errors
         console.print(f"🔍 [bold red]Discovery error: {e}[/bold red]")
         console.print("💡 Check MODEL_DIR or models.json configuration")
         raise typer.Exit(ExitCode.FAILURE)
-        
+
     except KeyboardInterrupt:
         console.print("\n🛑 [yellow]Build cancelled by user[/yellow]")
         raise typer.Exit(ExitCode.FAILURE)
-        
+
     except PermissionError as e:
         console.print(f"🔒 [bold red]Permission denied: {e}[/bold red]")
-        console.print("💡 Check file/directory permissions or run with appropriate privileges")
+        console.print(
+            "💡 Check file/directory permissions or run with appropriate privileges"
+        )
         raise typer.Exit(ExitCode.FAILURE)
-        
+
     except FileNotFoundError as e:
         console.print(f"📁 [bold red]File not found: {e}[/bold red]")
         console.print("💡 Check that all required files exist")
         raise typer.Exit(ExitCode.FAILURE)
-        
+
     except Exception as e:
         console.print(f"💥 [bold red]Unexpected error: {e}[/bold red]")
         if verbose:
             console.print_exception()
-        
+
         from madengine.core.errors import handle_error, create_error_context
+
         context = create_error_context(
-            operation="build",
-            phase="build",
-            component="build_command"
+            operation="build", phase="build", component="build_command"
         )
         handle_error(e, context=context)
         raise typer.Exit(ExitCode.FAILURE)
-
diff --git a/src/madengine/cli/commands/database.py b/src/madengine/cli/commands/database.py
index 8f804e06..2d5b699d 100644
--- a/src/madengine/cli/commands/database.py
+++ b/src/madengine/cli/commands/database.py
@@ -22,7 +22,7 @@
 from madengine.database.mongodb import (
     upload_file_to_mongodb,
     MongoDBConfig,
-    UploadOptions
+    UploadOptions,
 )
 from ..constants import ExitCode
 from ..utils import setup_logging
@@ -33,85 +33,64 @@
 def database(
     file: Annotated[
         str,
-        typer.Option(
-            "--file", "-f",
-            help="Path to file (CSV or JSON, auto-detected)"
-        ),
+        typer.Option("--file", "-f", help="Path to file (CSV or JSON, auto-detected)"),
     ],
     database: Annotated[
         str,
-        typer.Option(
-            "--database", "--db",
-            help="MongoDB database name"
-        ),
+        typer.Option("--database", "--db", help="MongoDB database name"),
     ],
     collection: Annotated[
         str,
-        typer.Option(
-            "--collection", "-c",
-            help="MongoDB collection name"
-        ),
+        typer.Option("--collection", "-c", help="MongoDB collection name"),
     ],
     unique_key: Annotated[
         str,
         typer.Option(
-            "--unique-key", "-k",
-            help="Unique field(s) for deduplication (comma-separated, auto-detected if not specified)"
+            "--unique-key",
+            "-k",
+            help="Unique field(s) for deduplication (comma-separated, auto-detected if not specified)",
         ),
     ] = None,
     batch_size: Annotated[
         int,
-        typer.Option(
-            "--batch-size",
-            help="Batch size for bulk operations"
-        ),
+        typer.Option("--batch-size", help="Batch size for bulk operations"),
     ] = 1000,
     no_upsert: Annotated[
         bool,
         typer.Option(
-            "--no-upsert",
-            help="Insert only (don't update existing documents)"
+            "--no-upsert", help="Insert only (don't update existing documents)"
         ),
     ] = False,
     no_index: Annotated[
         bool,
-        typer.Option(
-            "--no-index",
-            help="Skip automatic index creation"
-        ),
+        typer.Option("--no-index", help="Skip automatic index creation"),
     ] = False,
     dry_run: Annotated[
         bool,
-        typer.Option(
-            "--dry-run",
-            help="Validate without uploading"
-        ),
+        typer.Option("--dry-run", help="Validate without uploading"),
     ] = False,
     verbose: Annotated[
         bool,
-        typer.Option(
-            "--verbose", "-v",
-            help="Verbose output"
-        ),
+        typer.Option("--verbose", "-v", help="Verbose output"),
     ] = False,
 ) -> None:
     """
     💾 Upload CSV or JSON files to MongoDB.
-    
+
     Supports intelligent type preservation, automatic deduplication,
     and bulk operations for optimal performance.
-    
+
     \b
     Examples:
         # Upload JSON with auto-detection
         madengine database -f perf_entry_super.json --db mydb -c perf_super
-        
+
         # Upload CSV with custom unique key
         madengine database -f perf.csv --db test -c results -k model,timestamp
-        
+
         # Dry run to validate
         madengine database -f data.json --db test -c data --dry-run
-        
+
     \b
     Environment Variables:
         MONGO_HOST        MongoDB host (default: localhost)
@@ -119,12 +98,12 @@ def database(
         MONGO_USER        MongoDB username
         MONGO_PASSWORD    MongoDB password
     """
-    
+
     setup_logging(verbose)
-    
+
     # Display configuration
     file_path = Path(file)
-    
+
     console.print(
         Panel(
             f"💾 [bold cyan]MongoDB Upload[/bold cyan]\n\n"
@@ -136,29 +115,29 @@ def database(
             border_style="cyan",
         )
     )
-    
+
     # Validate file exists
     if not file_path.exists():
         console.print(f"❌ [bold red]File not found: {file}[/bold red]")
         raise typer.Exit(ExitCode.FAILURE)
-    
+
     # Prepare configuration
     config = MongoDBConfig.from_env()
-    
+
     # Parse unique fields
     unique_fields = None
     if unique_key:
-        unique_fields = [k.strip() for k in unique_key.split(',')]
-    
+        unique_fields = [k.strip() for k in unique_key.split(",")]
+
     # Prepare options
     options = UploadOptions(
         unique_fields=unique_fields,
         upsert=not no_upsert,
         batch_size=batch_size,
         create_indexes=not no_index,
-        dry_run=dry_run
+        dry_run=dry_run,
     )
-    
+
     try:
         # Perform upload
         result = upload_file_to_mongodb(
@@ -166,13 +145,13 @@ def database(
             database_name=database,
             collection_name=collection,
             config=config,
-            options=options
+            options=options,
         )
-        
+
         # Display results
         console.print()
         result.print_summary()
-        
+
         # Show errors if any
         if result.errors and verbose:
             console.print("\n⚠️  [yellow]Errors:[/yellow]")
@@ -180,15 +159,19 @@ def database(
                 console.print(f"   {i}. {error}")
             if len(result.errors) > 10:
                 console.print(f"   ... and {len(result.errors) - 10} more errors")
-        
+
         # Exit with appropriate code
         if result.status == "success":
             raise typer.Exit(ExitCode.SUCCESS)
         elif result.status == "partial":
-            raise typer.Exit(ExitCode.SUCCESS if result.documents_inserted + result.documents_updated > 0 else ExitCode.FAILURE)
+            raise typer.Exit(
+                ExitCode.SUCCESS
+                if result.documents_inserted + result.documents_updated > 0
+                else ExitCode.FAILURE
+            )
         else:
             raise typer.Exit(ExitCode.FAILURE)
-            
+
     except typer.Exit:
         # Re-raise typer.Exit without catching it
         raise
diff --git a/src/madengine/cli/commands/discover.py b/src/madengine/cli/commands/discover.py
index 867a50e0..c160c3c8 100644
--- a/src/madengine/cli/commands/discover.py
+++ b/src/madengine/cli/commands/discover.py
@@ -18,13 +18,20 @@
 from madengine.utils.discover_models import DiscoverModels
 
 from ..constants import ExitCode
-from ..utils import console, setup_logging, split_comma_separated_tags, create_args_namespace
+from ..utils import (
+    console,
+    setup_logging,
+    split_comma_separated_tags,
+    create_args_namespace,
+)
 
 
 def discover(
     tags: Annotated[
         List[str],
-        typer.Option("--tags", "-t", help="Model tags to discover (can specify multiple)"),
+        typer.Option(
+            "--tags", "-t", help="Model tags to discover (can specify multiple)"
+        ),
     ] = [],
     verbose: Annotated[
         bool, typer.Option("--verbose", "-v", help="Enable verbose logging")
@@ -58,17 +65,18 @@ def discover(
     try:
         # Create args namespace similar to mad.py
         args = create_args_namespace(tags=processed_tags)
-        
+
         # Use DiscoverModels class
         # Note: DiscoverModels prints output directly and returns None
         discover_models_instance = DiscoverModels(args=args)
         result = discover_models_instance.run()
-        
-        console.print("✅ [bold green]Model discovery completed successfully[/bold green]")
+
+        console.print(
+            "✅ [bold green]Model discovery completed successfully[/bold green]"
+        )
 
     except Exception as e:
         console.print(f"💥 [bold red]Model discovery failed: {e}[/bold red]")
         if verbose:
             console.print_exception()
         raise typer.Exit(ExitCode.FAILURE)
-
diff --git a/src/madengine/cli/commands/report.py b/src/madengine/cli/commands/report.py
index 2bd348c0..e73c046e 100644
--- a/src/madengine/cli/commands/report.py
+++ b/src/madengine/cli/commands/report.py
@@ -19,12 +19,11 @@
 except ImportError:
     from typing_extensions import Annotated  # Python 3.8
 
-from madengine.reporting.csv_to_html import ConvertCsvToHtml
 from madengine.reporting.csv_to_email import ConvertCsvToEmail
+from madengine.reporting.csv_to_html import ConvertCsvToHtml
 
 from ..constants import ExitCode
-from ..utils import console, setup_logging, create_args_namespace
-
+from ..utils import console, create_args_namespace, setup_logging
 
 # Create a sub-app for report commands
 report_app = typer.Typer(
@@ -39,10 +38,7 @@
 def to_html(
     csv_file_path: Annotated[
         str,
-        typer.Option(
-            "--csv-file-path",
-            help="Path to the CSV file to convert to HTML"
-        ),
+        typer.Option("--csv-file-path", help="Path to the CSV file to convert to HTML"),
     ],
     verbose: Annotated[
         bool, typer.Option("--verbose", "-v", help="Enable verbose logging")
@@ -50,10 +46,10 @@ def to_html(
 ) -> None:
     """
     📄 Convert a single CSV file to HTML report.
-    
+
     This command converts a CSV file to an HTML table format,
     useful for viewing performance metrics in a web browser.
-    
+
     Examples:
         madengine report to-html --csv-file-path perf_amd.csv
         madengine report to-html --csv-file-path results/perf_mi300.csv
@@ -71,29 +67,37 @@ def to_html(
 
     # Validate input
     if not os.path.exists(csv_file_path):
-        console.print(f"❌ [bold red]Error: CSV file not found: {csv_file_path}[/bold red]")
+        console.print(
+            f"❌ [bold red]Error: CSV file not found: {csv_file_path}[/bold red]"
+        )
         raise typer.Exit(ExitCode.FAILURE)
-    
+
     if not os.path.isfile(csv_file_path):
-        console.print(f"❌ [bold red]Error: Path is not a file: {csv_file_path}[/bold red]")
+        console.print(
+            f"❌ [bold red]Error: Path is not a file: {csv_file_path}[/bold red]"
+        )
         raise typer.Exit(ExitCode.FAILURE)
-    
-    if not csv_file_path.endswith('.csv'):
-        console.print(f"❌ [bold red]Error: File must be a CSV file: {csv_file_path}[/bold red]")
+
+    if not csv_file_path.endswith(".csv"):
+        console.print(
+            f"❌ [bold red]Error: File must be a CSV file: {csv_file_path}[/bold red]"
+        )
         raise typer.Exit(ExitCode.FAILURE)
 
     try:
         # Create args namespace for compatibility with existing code
         args = create_args_namespace(csv_file_path=csv_file_path)
-        
+
         # Use ConvertCsvToHtml class
         converter = ConvertCsvToHtml(args=args)
         result = converter.run()
-        
+
         if result:
             # Determine output file name
-            output_file = str(Path(csv_file_path).with_suffix('.html'))
-            console.print(f"✅ [bold green]Successfully converted to: {output_file}[/bold green]")
+            output_file = str(Path(csv_file_path).with_suffix(".html"))
+            console.print(
+                f"✅ [bold green]Successfully converted to: {output_file}[/bold green]"
+            )
         else:
             console.print("❌ [bold red]Conversion failed[/bold red]")
             raise typer.Exit(ExitCode.FAILURE)
@@ -112,16 +116,12 @@ def to_email(
         typer.Option(
             "--directory",
             "--dir",
-            help="Path to directory containing CSV files to consolidate"
+            help="Path to directory containing CSV files to consolidate",
         ),
     ] = ".",
     output: Annotated[
         str,
-        typer.Option(
-            "--output",
-            "-o",
-            help="Output HTML filename"
-        ),
+        typer.Option("--output", "-o", help="Output HTML filename"),
     ] = "run_results.html",
     verbose: Annotated[
         bool, typer.Option("--verbose", "-v", help="Enable verbose logging")
@@ -129,10 +129,10 @@ def to_email(
 ) -> None:
     """
     📧 Convert all CSV files in a directory to consolidated email-ready HTML report.
-    
+
     This command scans a directory for CSV files and combines them into a single
     HTML report with sections for each CSV file, suitable for email distribution.
-    
+
     Examples:
         madengine report to-email
         madengine report to-email --directory ./results
@@ -152,26 +152,36 @@ def to_email(
 
     # Validate input
     if not os.path.exists(directory):
-        console.print(f"❌ [bold red]Error: Directory not found: {directory}[/bold red]")
+        console.print(
+            f"❌ [bold red]Error: Directory not found: {directory}[/bold red]"
+        )
         raise typer.Exit(ExitCode.FAILURE)
-    
+
     if not os.path.isdir(directory):
-        console.print(f"❌ [bold red]Error: Path is not a directory: {directory}[/bold red]")
-        console.print(f"💡 [cyan]Tip: Use 'to-html' command for single CSV files[/cyan]")
+        console.print(
+            f"❌ [bold red]Error: Path is not a directory: {directory}[/bold red]"
+        )
+        console.print(
+            f"💡 [cyan]Tip: Use 'to-html' command for single CSV files[/cyan]"
+        )
         raise typer.Exit(ExitCode.FAILURE)
 
     try:
         # Create args namespace for compatibility with existing code
         # The old code expects 'csv_file_path' to be the directory
         args = create_args_namespace(csv_file_path=directory, output_file=output)
-        
+
         # Use ConvertCsvToEmail class
         converter = ConvertCsvToEmail(args=args)
         result = converter.run()
-        
+
         if result:
-            output_path = os.path.join(directory, output) if directory != "." else output
-            console.print(f"✅ [bold green]Successfully generated email report: {output_path}[/bold green]")
+            output_path = (
+                os.path.join(directory, output) if directory != "." else output
+            )
+            console.print(
+                f"✅ [bold green]Successfully generated email report: {output_path}[/bold green]"
+            )
         else:
             console.print("⚠️  [yellow]No CSV files found to process[/yellow]")
 
@@ -186,4 +196,3 @@ def to_email(
 def report() -> typer.Typer:
     """Return the report sub-app."""
     return report_app
-
diff --git a/src/madengine/cli/commands/run.py b/src/madengine/cli/commands/run.py
index 0efc46e8..a23ac280 100644
--- a/src/madengine/cli/commands/run.py
+++ b/src/madengine/cli/commands/run.py
@@ -81,6 +81,17 @@ def run(
             help="File containing additional context JSON",
         ),
     ] = None,
+    config: Annotated[
+        Optional[List[str]],
+        typer.Option(
+            "--config",
+            help=(
+                "YAML config file and/or Hydra overrides "
+                "(e.g., --config my_job.yaml, --config scheduler=slurm --config launcher=torchrun). "
+                "Cannot be combined with --additional-context or --additional-context-file."
+            ),
+        ),
+    ] = None,
     keep_alive: Annotated[
         bool,
         typer.Option("--keep-alive", help="Keep Docker containers alive after run"),
@@ -164,6 +175,39 @@ def run(
     # Process tags to handle comma-separated values
     processed_tags = split_comma_separated_tags(tags)
 
+    # --config is mutually exclusive with --additional-context and --additional-context-file
+    if config:
+        if additional_context and additional_context.strip() not in ("", "{}"):
+            console.print(
+                "[red]Error:[/red] --config cannot be used together with --additional-context. "
+                "Use one or the other.",
+                style="bold",
+            )
+            raise typer.Exit(code=ExitCode.INVALID_ARGS.value)
+        if additional_context_file:
+            console.print(
+                "[red]Error:[/red] --config cannot be used together with --additional-context-file. "
+                "Use one or the other.",
+                style="bold",
+            )
+            raise typer.Exit(code=ExitCode.INVALID_ARGS.value)
+
+        from madengine.config import load_config
+
+        config_ctx, config_meta = load_config(config)
+
+        if not processed_tags and config_meta.get("model", {}).get("tags"):
+            processed_tags = config_meta["model"]["tags"]
+        if timeout == DEFAULT_TIMEOUT and config_meta.get("model", {}).get("timeout"):
+            timeout = config_meta["model"]["timeout"]
+        if not manifest_file and config_meta.get("model", {}).get("manifest_file"):
+            manifest_file = config_meta["model"]["manifest_file"]
+        if not registry and config_meta.get("build", {}).get("registry"):
+            registry = config_meta["build"]["registry"]
+
+        additional_context = repr(config_ctx)
+        additional_context_file = None
+
     # Input validation
     if timeout < -1:
         console.print(
@@ -247,7 +291,7 @@ def run(
                 task = progress.add_task(
                     "Initializing execution orchestrator...", total=None
                 )
-                
+
                 # Use new RunOrchestrator
                 orchestrator = RunOrchestrator(args)
                 progress.update(task, description="Running models...")
@@ -262,23 +306,29 @@ def run(
 
             # Display results summary
             display_results_table(execution_summary, "Execution Results")
-            
+
             # Display detailed performance metrics from CSV (show all historical runs, mark current ones)
             perf_csv_path = getattr(args, "output", DEFAULT_PERF_OUTPUT)
             session_start_row = execution_summary.get("session_start_row")
             display_performance_table(perf_csv_path, session_start_row)
-            
+
             # Cleanup session marker AFTER display (so display functions can use it)
             from madengine.utils.session_tracker import SessionTracker
+
             tracker = SessionTracker(perf_csv_path)
             tracker.cleanup_marker()
-            
+
             # Cleanup intermediate perf files if requested
             if cleanup_perf:
-                from madengine.utils.perf_cleanup import cleanup_perf_intermediates as do_cleanup
-                console.print("\n🧹 [cyan]Cleaning up intermediate performance files...[/cyan]")
+                from madengine.utils.perf_cleanup import (
+                    cleanup_perf_intermediates as do_cleanup,
+                )
+
+                console.print(
+                    "\n🧹 [cyan]Cleaning up intermediate performance files...[/cyan]"
+                )
                 do_cleanup()
-            
+
             save_summary_with_feedback(execution_summary, summary_output, "Execution")
 
             failed_runs = len(execution_summary.get("failed_runs", []))
@@ -351,10 +401,10 @@ def run(
                 task = progress.add_task(
                     "Initializing workflow orchestrator...", total=None
                 )
-                
+
                 # Use new RunOrchestrator (handles build+run automatically when tags provided)
                 orchestrator = RunOrchestrator(args)
-                
+
                 progress.update(task, description="Building and running models...")
                 execution_summary = orchestrator.execute(
                     manifest_file=None,  # Triggers build phase
@@ -365,7 +415,7 @@ def run(
                 progress.update(task, description="Workflow completed!")
 
             # Load build summary from generated manifest
-            with open(manifest_output, 'r') as f:
+            with open(manifest_output, "r") as f:
                 manifest = json.load(f)
                 build_summary = manifest.get("summary", {})
 
@@ -382,23 +432,29 @@ def run(
             # Display results
             display_results_table(build_summary, "Build Results")
             display_results_table(execution_summary, "Execution Results")
-            
+
             # Display detailed performance metrics from CSV (show all historical runs, mark current ones)
             perf_csv_path = getattr(args, "output", DEFAULT_PERF_OUTPUT)
             session_start_row = execution_summary.get("session_start_row")
             display_performance_table(perf_csv_path, session_start_row)
-            
+
             # Cleanup session marker AFTER display (so display functions can use it)
             from madengine.utils.session_tracker import SessionTracker
+
             tracker = SessionTracker(perf_csv_path)
             tracker.cleanup_marker()
-            
+
             # Cleanup intermediate perf files if requested
             if cleanup_perf:
-                from madengine.utils.perf_cleanup import cleanup_perf_intermediates as do_cleanup
-                console.print("\n🧹 [cyan]Cleaning up intermediate performance files...[/cyan]")
+                from madengine.utils.perf_cleanup import (
+                    cleanup_perf_intermediates as do_cleanup,
+                )
+
+                console.print(
+                    "\n🧹 [cyan]Cleaning up intermediate performance files...[/cyan]"
+                )
                 do_cleanup()
-            
+
             save_summary_with_feedback(workflow_summary, summary_output, "Workflow")
 
             if workflow_summary["overall_success"]:
@@ -435,41 +491,39 @@ def run(
     except ExecutionError as e:
         # Runtime execution errors
         console.print(f"💥 [bold red]Runtime error: {e}[/bold red]")
-        if hasattr(e, 'suggestions') and e.suggestions:
+        if hasattr(e, "suggestions") and e.suggestions:
             console.print("\n💡 [cyan]Suggestions:[/cyan]")
             for suggestion in e.suggestions:
                 console.print(f"  • {suggestion}")
         raise typer.Exit(ExitCode.RUN_FAILURE)
-        
+
     except ConfigurationError as e:
         # Configuration errors
         console.print(f"⚙️  [bold red]Configuration error: {e}[/bold red]")
-        if hasattr(e, 'suggestions') and e.suggestions:
+        if hasattr(e, "suggestions") and e.suggestions:
             console.print("\n💡 [cyan]Suggestions:[/cyan]")
             for suggestion in e.suggestions:
                 console.print(f"  • {suggestion}")
         raise typer.Exit(ExitCode.INVALID_ARGS)
-        
+
     except KeyboardInterrupt:
         console.print("\n🛑 [yellow]Run cancelled by user[/yellow]")
         raise typer.Exit(ExitCode.FAILURE)
-        
+
     except FileNotFoundError as e:
         console.print(f"📁 [bold red]File not found: {e}[/bold red]")
         console.print("💡 Check manifest file path and required files")
         raise typer.Exit(ExitCode.FAILURE)
-        
+
     except Exception as e:
         console.print(f"💥 [bold red]Run process failed: {e}[/bold red]")
         if verbose:
             console.print_exception()
-        
+
         from madengine.core.errors import handle_error, create_error_context
+
         context = create_error_context(
-            operation="run",
-            phase="run",
-            component="run_command"
+            operation="run", phase="run", component="run_command"
         )
         handle_error(e, context=context)
         raise typer.Exit(ExitCode.FAILURE)
-
diff --git a/src/madengine/cli/constants.py b/src/madengine/cli/constants.py
index b437fa30..e80eb5c1 100644
--- a/src/madengine/cli/constants.py
+++ b/src/madengine/cli/constants.py
@@ -29,4 +29,3 @@ class ExitCode(IntEnum):
 DEFAULT_DATA_CONFIG = "data.json"
 DEFAULT_TOOLS_CONFIG = "./scripts/common/tools.json"
 DEFAULT_TIMEOUT = -1
-
diff --git a/src/madengine/cli/utils.py b/src/madengine/cli/utils.py
index 75e026b7..d7750497 100644
--- a/src/madengine/cli/utils.py
+++ b/src/madengine/cli/utils.py
@@ -50,26 +50,26 @@ def setup_logging(verbose: bool = False) -> None:
 
 def split_comma_separated_tags(tags: List[str]) -> List[str]:
     """Split comma-separated tags into individual tags.
-    
+
     Handles both formats:
     - Multiple flags: --tags dummy --tags multi → ['dummy', 'multi']
     - Comma-separated: --tags dummy,multi → ['dummy', 'multi']
-    
+
     Args:
         tags: List of tag strings (may contain comma-separated values)
-        
+
     Returns:
         List of individual tag strings
     """
     if not tags:
         return []
-    
+
     processed_tags = []
     for tag in tags:
         # Split by comma and strip whitespace
-        split_tags = [t.strip() for t in tag.split(',') if t.strip()]
+        split_tags = [t.strip() for t in tag.split(",") if t.strip()]
         processed_tags.extend(split_tags)
-    
+
     return processed_tags
 
 
@@ -100,31 +100,33 @@ def save_summary_with_feedback(
             raise typer.Exit(ExitCode.FAILURE)
 
 
-def display_results_table(summary: Dict, title: str, show_gpu_arch: bool = False) -> None:
+def display_results_table(
+    summary: Dict, title: str, show_gpu_arch: bool = False
+) -> None:
     """
     Display results in a formatted table.
-    
+
     Automatically detects:
     - BUILD results: Simple format (no nodes/performance)
     - RUN results with nodes: Enhanced per-node breakdown
     """
     successful = summary.get("successful_builds", summary.get("successful_runs", []))
     failed = summary.get("failed_builds", summary.get("failed_runs", []))
-    
+
     # Detect if this is a RUN result with per-node data (vs BUILD result)
     has_node_data = False
     for item in successful + failed:
         if isinstance(item, dict) and ("nodes" in item or "perf_data" in item):
             has_node_data = True
             break
-    
+
     # Create table with appropriate columns based on result type
     if has_node_data:
         # RUN results - enhanced format with per-node breakdown
         table = Table(
-            title=f"⚡ {title} (Per-Node Breakdown)", 
-            show_header=True, 
-            header_style="bold magenta"
+            title=f"⚡ {title} (Per-Node Breakdown)",
+            show_header=True,
+            header_style="bold magenta",
         )
         table.add_column("Index", justify="right", style="dim")
         table.add_column("Status", style="bold")
@@ -135,14 +137,12 @@ def display_results_table(summary: Dict, title: str, show_gpu_arch: bool = False
     else:
         # BUILD results - simple format (no per-node data)
         table = Table(
-            title=f"⚡ {title}", 
-            show_header=True, 
-            header_style="bold magenta"
+            title=f"⚡ {title}", show_header=True, header_style="bold magenta"
         )
         table.add_column("Index", justify="right", style="dim")
         table.add_column("Status", style="bold")
         table.add_column("Model", style="cyan")
-    
+
     # Add GPU Architecture column if multi-arch build was used
     if show_gpu_arch:
         table.add_column("GPU Architecture", style="blue")
@@ -151,11 +151,7 @@ def build_gpu_arch_display(item: Dict) -> str:
         """Prefer gpu_architecture (DockerBuilder) then architecture (failures / legacy)."""
         if not isinstance(item, dict):
             return "N/A"
-        return (
-            item.get("gpu_architecture")
-            or item.get("architecture")
-            or "N/A"
-        )
+        return item.get("gpu_architecture") or item.get("architecture") or "N/A"
 
     # Helper function to extract model name from build result
     def extract_model_name(item):
@@ -171,7 +167,11 @@ def extract_model_name(item):
                 docker_image = item["docker_image"]
                 if docker_image.startswith("ci-"):
                     parts = docker_image[3:].split("_")
-                    model_name = parts[0] if len(parts) >= 2 else (parts[0] if parts else docker_image)
+                    model_name = (
+                        parts[0]
+                        if len(parts) >= 2
+                        else (parts[0] if parts else docker_image)
+                    )
                 else:
                     model_name = docker_image
                 return model_name
@@ -189,13 +189,13 @@ def format_number(value):
     # Add successful builds/runs
     row_index = 1
     job_summaries = []  # For final summary line
-    
+
     for item in successful:
         if isinstance(item, dict):
             model_name = extract_model_name(item)
             nodes = item.get("nodes", [])
             perf_data = item.get("perf_data", {})
-            
+
             if has_node_data:
                 # RUN results - show per-node breakdown
                 if not nodes:
@@ -204,49 +204,69 @@ def format_number(value):
                     node_str = "node-0"
                     perf = perf_data.get("performance", "-")
                     metric = perf_data.get("metric", "-")
-                    
-                    row = [str(row_index), status, model_name, node_str, format_number(perf), metric]
+
+                    row = [
+                        str(row_index),
+                        status,
+                        model_name,
+                        node_str,
+                        format_number(perf),
+                        metric,
+                    ]
                     if show_gpu_arch:
                         row.append(perf_data.get("gpu_architecture", "N/A"))
                     table.add_row(*row)
                     row_index += 1
-                    
-                    job_summaries.append({
-                        "model": model_name,
-                        "nodes_succeeded": 1,
-                        "nodes_total": 1,
-                        "aggregated_perf": perf,
-                        "metric": metric
-                    })
+
+                    job_summaries.append(
+                        {
+                            "model": model_name,
+                            "nodes_succeeded": 1,
+                            "nodes_total": 1,
+                            "aggregated_perf": perf,
+                            "metric": metric,
+                        }
+                    )
                 else:
                     # Multi-node - show all nodes
                     aggregated_perf = perf_data.get("performance")
                     aggregated_metric = perf_data.get("metric")
-                    
-                    nodes_succeeded = sum(1 for n in nodes if n.get("status") == "SUCCESS")
-                    
+
+                    nodes_succeeded = sum(
+                        1 for n in nodes if n.get("status") == "SUCCESS"
+                    )
+
                     for node in nodes:
                         status_icon = "✅" if node.get("status") == "SUCCESS" else "❌"
                         status = f"{status_icon} {node.get('status')}"
                         node_str = f"node-{node['node_id']}"
-                        
+
                         # Show node-local performance
                         perf = node.get("performance", "-")
                         metric = node.get("metric", "-")
-                        
-                        row = [str(row_index), status, model_name, node_str, format_number(perf) if perf != "-" else "-", metric if metric else "-"]
+
+                        row = [
+                            str(row_index),
+                            status,
+                            model_name,
+                            node_str,
+                            format_number(perf) if perf != "-" else "-",
+                            metric if metric else "-",
+                        ]
                         if show_gpu_arch:
                             row.append(perf_data.get("gpu_architecture", "N/A"))
                         table.add_row(*row)
                         row_index += 1
-                    
-                    job_summaries.append({
-                        "model": model_name,
-                        "nodes_succeeded": nodes_succeeded,
-                        "nodes_total": len(nodes),
-                        "aggregated_perf": aggregated_perf,
-                        "metric": aggregated_metric
-                    })
+
+                    job_summaries.append(
+                        {
+                            "model": model_name,
+                            "nodes_succeeded": nodes_succeeded,
+                            "nodes_total": len(nodes),
+                            "aggregated_perf": aggregated_perf,
+                            "metric": aggregated_metric,
+                        }
+                    )
             else:
                 # BUILD results - simple format (no node/performance columns)
                 status = "✅ Success"
@@ -272,12 +292,19 @@ def format_number(value):
         if isinstance(item, dict):
             model_name = item.get("model", "Unknown")
             nodes = item.get("nodes", [])
-            
+
             if has_node_data:
                 # RUN results - show per-node failures
                 if not nodes:
                     # Single failure
-                    row = [str(row_index), "❌ Failed", model_name, "node-0", "-", item.get("error", "Unknown")]
+                    row = [
+                        str(row_index),
+                        "❌ Failed",
+                        model_name,
+                        "node-0",
+                        "-",
+                        item.get("error", "Unknown"),
+                    ]
                     if show_gpu_arch:
                         row.append(item.get("architecture", "N/A"))
                     table.add_row(*row)
@@ -289,7 +316,14 @@ def format_number(value):
                         status = f"{status_icon} {node.get('status', 'FAILED')}"
                         node_str = f"node-{node['node_id']}"
                         error = node.get("error", "-")
-                        row = [str(row_index), status, model_name, node_str, "-", error if error else "-"]
+                        row = [
+                            str(row_index),
+                            status,
+                            model_name,
+                            node_str,
+                            "-",
+                            error if error else "-",
+                        ]
                         if show_gpu_arch:
                             row.append("N/A")
                         table.add_row(*row)
@@ -322,7 +356,7 @@ def format_number(value):
         table.add_row(*row)
 
     console.print(table)
-    
+
     # Print job-level summaries for multi-node jobs (RUN results only)
     if has_node_data and job_summaries:
         console.print("\n💡 [bold]Job Summary:[/bold]")
@@ -338,11 +372,13 @@ def format_number(value):
                 )
 
 
-def display_performance_table(perf_csv_path: str = "perf.csv", session_start_row: int = None) -> None:
+def display_performance_table(
+    perf_csv_path: str = "perf.csv", session_start_row: int = None
+) -> None:
     """Display performance metrics from perf.csv file.
-    
+
     Shows all historical runs with visual markers for current session runs.
-    
+
     Args:
         perf_csv_path: Path to the performance CSV file
         session_start_row: Optional row number to filter from (for current session only)
@@ -350,40 +386,40 @@ def display_performance_table(perf_csv_path: str = "perf.csv", session_start_row
     if not os.path.exists(perf_csv_path):
         console.print(f"[yellow]⚠️  Performance CSV not found: {perf_csv_path}[/yellow]")
         return
-    
+
     try:
         import pandas as pd
         from madengine.utils.session_tracker import SessionTracker
-        
+
         # Read CSV file
         df = pd.read_csv(perf_csv_path)
-        
+
         if df.empty:
             console.print("[yellow]⚠️  Performance CSV is empty[/yellow]")
             return
-        
+
         total_rows = len(df)
-        
+
         # Try parameter first, then fall back to marker file
         if session_start_row is None:
-            session_start_row = SessionTracker.load_session_marker_for_csv(perf_csv_path)
-        
+            session_start_row = SessionTracker.load_session_marker_for_csv(
+                perf_csv_path
+            )
+
         # Count current session runs for title
         if session_start_row is not None and session_start_row < total_rows:
             current_run_count = total_rows - session_start_row
             title = f"📊 Performance Results (all {total_rows} runs, {current_run_count} from current session)"
         else:
             title = f"📊 Performance Results (all {total_rows} runs)"
-        
+
         # Create performance table
-        perf_table = Table(
-            title=title,
-            show_header=True,
-            header_style="bold magenta"
-        )
-        
+        perf_table = Table(title=title, show_header=True, header_style="bold magenta")
+
         # Add columns (with "Run" marker column as first column)
-        perf_table.add_column("Run", justify="center", width=4)  # Marker column for current session
+        perf_table.add_column(
+            "Run", justify="center", width=4
+        )  # Marker column for current session
         perf_table.add_column("Index", justify="right", style="dim")
         perf_table.add_column("Model", style="cyan")
         perf_table.add_column("Topology", justify="center", style="blue")
@@ -395,8 +431,8 @@ def display_performance_table(perf_csv_path: str = "perf.csv", session_start_row
         perf_table.add_column("Status", style="bold")
         perf_table.add_column("Duration", justify="right", style="blue", min_width=8)
         perf_table.add_column("Data Name", style="magenta")
-        perf_table.add_column("Data Provider", style="magenta")        
-        
+        perf_table.add_column("Data Provider", style="magenta")
+
         # Helper function to format duration (accepts float seconds or "Xs" string)
         def format_duration(duration):
             if pd.isna(duration) or duration == "" or duration is None:
@@ -414,7 +450,7 @@ def format_duration(duration):
                     return f"{dur/60:.1f}m"
             except (ValueError, TypeError):
                 return str(duration) if duration else "N/A"
-        
+
         # Helper function to format performance
         def format_performance(perf):
             if pd.isna(perf) or perf == "":
@@ -431,48 +467,80 @@ def format_performance(perf):
                     return f"{val:.4g}"
             except (ValueError, TypeError):
                 return str(perf)
-        
+
         # Add rows from dataframe
         for idx, row in df.iterrows():
             # Determine if this is a current session run
-            is_current_run = (session_start_row is not None and idx >= session_start_row)
-            run_marker = "[bold green]➤[/]" if is_current_run else ""  # Arrow marker for current runs
-            
+            is_current_run = session_start_row is not None and idx >= session_start_row
+            run_marker = (
+                "[bold green]➤[/]" if is_current_run else ""
+            )  # Arrow marker for current runs
+
             model_val = row.get("model", "Unknown")
             model = (
                 "Unknown"
-                if (pd.isna(model_val) or model_val == "" or str(model_val).strip() == "nan")
+                if (
+                    pd.isna(model_val)
+                    or model_val == ""
+                    or str(model_val).strip() == "nan"
+                )
                 else str(model_val)
             )
-            dataname = str(row.get("dataname", "")) if not pd.isna(row.get("dataname")) and row.get("dataname") != "" else "N/A"
-            data_provider_type = str(row.get("data_provider_type", "")) if not pd.isna(row.get("data_provider_type")) and row.get("data_provider_type") != "" else "N/A"
-            
+            dataname = (
+                str(row.get("dataname", ""))
+                if not pd.isna(row.get("dataname")) and row.get("dataname") != ""
+                else "N/A"
+            )
+            data_provider_type = (
+                str(row.get("data_provider_type", ""))
+                if not pd.isna(row.get("data_provider_type"))
+                and row.get("data_provider_type") != ""
+                else "N/A"
+            )
+
             # Format topology: Always show "NxG" format for consistency
             # Examples: "1N×1G" (single node, single GPU), "1N×4G" (single node, 4 GPUs), "2N×2G" (2 nodes, 2 GPUs each)
             n_gpus = row.get("n_gpus", 1)
             nnodes = row.get("nnodes", 1)
             gpus_per_node = row.get("gpus_per_node", n_gpus)
-            
+
             # Determine topology display format
             try:
-                nnodes_int = int(nnodes) if not pd.isna(nnodes) and str(nnodes) != "" else 1
-                gpus_per_node_int = int(gpus_per_node) if not pd.isna(gpus_per_node) and str(gpus_per_node) != "" else int(n_gpus) if not pd.isna(n_gpus) else 1
-                
+                nnodes_int = (
+                    int(nnodes) if not pd.isna(nnodes) and str(nnodes) != "" else 1
+                )
+                gpus_per_node_int = (
+                    int(gpus_per_node)
+                    if not pd.isna(gpus_per_node) and str(gpus_per_node) != ""
+                    else int(n_gpus) if not pd.isna(n_gpus) else 1
+                )
+
                 # Always show NxG format for consistency
                 topology = f"{nnodes_int}N×{gpus_per_node_int}G"
             except (ValueError, TypeError):
                 # Fallback if parsing fails
                 topology = "N/A"
-            
+
             # Get launcher value as-is from the CSV (don't default to "docker" here)
-            launcher = str(row.get("launcher", "")) if not pd.isna(row.get("launcher")) and row.get("launcher") != "" else "N/A"
-            deployment_type = str(row.get("deployment_type", "local")) if not pd.isna(row.get("deployment_type")) and row.get("deployment_type") != "" else "local"
+            launcher = (
+                str(row.get("launcher", ""))
+                if not pd.isna(row.get("launcher")) and row.get("launcher") != ""
+                else "N/A"
+            )
+            deployment_type = (
+                str(row.get("deployment_type", "local"))
+                if not pd.isna(row.get("deployment_type"))
+                and row.get("deployment_type") != ""
+                else "local"
+            )
             gpu_arch = str(row.get("gpu_architecture", "N/A"))
             performance = format_performance(row.get("performance", ""))
-            metric = str(row.get("metric", "")) if not pd.isna(row.get("metric")) else ""
-            
+            metric = (
+                str(row.get("metric", "")) if not pd.isna(row.get("metric")) else ""
+            )
+
             status = str(row.get("status", "UNKNOWN"))
-            
+
             # Duration column shows ONLY test/execution time (not build time)
             # If test_duration is missing, show N/A
             test_dur = row.get("test_duration", "")
@@ -480,7 +548,7 @@ def format_performance(perf):
                 duration = format_duration(test_dur)
             else:
                 duration = "N/A"
-            
+
             # Color-code status
             if status == "SUCCESS":
                 status_display = "✅ Success"
@@ -488,13 +556,13 @@ def format_performance(perf):
                 status_display = "❌ Failed"
             else:
                 status_display = f"⚠️  {status}"
-            
+
             perf_table.add_row(
-                run_marker,         # Marker column showing ➤ for current runs
+                run_marker,  # Marker column showing ➤ for current runs
                 str(idx),
                 model,
                 topology,
-                launcher,           # Distributed launcher (docker, torchrun, vllm, etc.)
+                launcher,  # Distributed launcher (docker, torchrun, vllm, etc.)
                 deployment_type,
                 gpu_arch,
                 performance,
@@ -502,24 +570,27 @@ def format_performance(perf):
                 status_display,
                 duration,
                 dataname,
-                data_provider_type
+                data_provider_type,
             )
-        
+
         console.print()  # Add blank line
         console.print(perf_table)
-        
+
         # Print summary statistics
         total_runs = len(df)
         successful_runs = len(df[df["status"] == "SUCCESS"])
         failed_runs = len(df[df["status"] == "FAILURE"])
-        
+
         console.print()
-        console.print(f"[bold]Summary:[/bold] {total_runs} total runs, "
-                     f"[green]{successful_runs} successful[/green], "
-                     f"[red]{failed_runs} failed[/red]")
-        
+        console.print(
+            f"[bold]Summary:[/bold] {total_runs} total runs, "
+            f"[green]{successful_runs} successful[/green], "
+            f"[red]{failed_runs} failed[/red]"
+        )
+
     except ImportError:
-        console.print("[yellow]⚠️  pandas not installed. Install with: pip install pandas[/yellow]")
+        console.print(
+            "[yellow]⚠️  pandas not installed. Install with: pip install pandas[/yellow]"
+        )
     except Exception as e:
         console.print(f"[red]❌ Error reading performance CSV: {e}[/red]")
-
diff --git a/src/madengine/cli/validators.py b/src/madengine/cli/validators.py
index 1f7ee001..820248e0 100644
--- a/src/madengine/cli/validators.py
+++ b/src/madengine/cli/validators.py
@@ -14,22 +14,20 @@
 import typer
 from rich.console import Console
 
-from madengine.utils.discover_models import DiscoverModels
 from madengine.core.additional_context_defaults import (
     DEFAULT_GPU_VENDOR,
     DEFAULT_GUEST_OS,
     apply_build_context_defaults,
 )
-from .constants import ExitCode, VALID_GPU_VENDORS, VALID_GUEST_OS
-from .utils import create_args_namespace
+from madengine.utils.discover_models import DiscoverModels
 
+from .constants import VALID_GPU_VENDORS, VALID_GUEST_OS, ExitCode
+from .utils import create_args_namespace
 
 # Initialize Rich console
 console = Console()
 
-_EXAMPLE_ADDITIONAL_CONTEXT = (
-    '--additional-context \'{"docker_build_arg": {"MAD_SYSTEM_GPU_ARCHITECTURE": "gfx942"}}\''
-)
+_EXAMPLE_ADDITIONAL_CONTEXT = '--additional-context \'{"docker_build_arg": {"MAD_SYSTEM_GPU_ARCHITECTURE": "gfx942"}}\''
 
 
 def parse_additional_context_cli_string(additional_context: str) -> Dict[str, Any]:
@@ -44,9 +42,7 @@ def parse_additional_context_cli_string(additional_context: str) -> Dict[str, An
         try:
             parsed = ast.literal_eval(additional_context)
         except (ValueError, SyntaxError) as e:
-            console.print(
-                f"❌ Invalid additional_context format: [red]{e}[/red]"
-            )
+            console.print(f"❌ Invalid additional_context format: [red]{e}[/red]")
             console.print(
                 "💡 Use JSON or a Python dict literal, e.g. "
                 + _EXAMPLE_ADDITIONAL_CONTEXT
@@ -194,9 +190,7 @@ def validate_additional_context_structure(context: Dict[str, Any]) -> None:
 
     if "log_error_benign_patterns" in context:
         lebp = context["log_error_benign_patterns"]
-        if not isinstance(lebp, list) or not all(
-            isinstance(x, str) for x in lebp
-        ):
+        if not isinstance(lebp, list) or not all(isinstance(x, str) for x in lebp):
             _fail_structure(
                 "log_error_benign_patterns",
                 "an array of strings",
@@ -204,8 +198,10 @@ def validate_additional_context_structure(context: Dict[str, Any]) -> None:
 
     if "log_error_patterns" in context:
         lep = context["log_error_patterns"]
-        if not isinstance(lep, list) or not lep or not all(
-            isinstance(x, str) for x in lep
+        if (
+            not isinstance(lep, list)
+            or not lep
+            or not all(isinstance(x, str) for x in lep)
         ):
             _fail_structure(
                 "log_error_patterns",
diff --git a/src/madengine/config/__init__.py b/src/madengine/config/__init__.py
new file mode 100644
index 00000000..076a66ea
--- /dev/null
+++ b/src/madengine/config/__init__.py
@@ -0,0 +1,25 @@
+"""Config-driven YAML configuration system for madengine."""
+
+from madengine.config.loader import HydraConfigLoader
+from madengine.config.schema import ConfigValidator
+from madengine.config.translator import ConfigTranslator
+
+
+def load_config(config_args: list) -> tuple:
+    """Load config from Hydra overrides and/or user YAML file.
+
+    Args:
+        config_args: List of Hydra overrides and/or a YAML file path.
+
+    Returns:
+        Tuple of (additional_context dict, metadata dict).
+    """
+    cfg = HydraConfigLoader.load(config_args)
+    errors = ConfigValidator.validate(cfg)
+    if errors:
+        from madengine.core.errors import ConfigurationError
+
+        raise ConfigurationError(
+            "Config validation errors:\n" + "\n".join(f"  - {e}" for e in errors)
+        )
+    return ConfigTranslator.to_additional_context(cfg)
diff --git a/src/madengine/config/loader.py b/src/madengine/config/loader.py
new file mode 100644
index 00000000..5925065d
--- /dev/null
+++ b/src/madengine/config/loader.py
@@ -0,0 +1,62 @@
+"""Hydra-based config loader using the Compose API."""
+
+import importlib.resources
+import os
+from pathlib import Path
+
+from hydra import compose, initialize_config_dir
+from hydra.core.global_hydra import GlobalHydra
+from omegaconf import DictConfig, OmegaConf
+
+from madengine.core.errors import ConfigurationError
+
+
+class HydraConfigLoader:
+    """Loads madengine config using Hydra's Compose API."""
+
+    @staticmethod
+    def load(config_args: list) -> DictConfig:
+        """Load and compose config from Hydra overrides and/or user YAML.
+
+        Args:
+            config_args: Mix of Hydra overrides and optional user YAML path.
+
+        Returns:
+            Composed DictConfig with all merges applied.
+        """
+        user_file, overrides = HydraConfigLoader._parse_args(config_args)
+
+        config_dir = str(Path(importlib.resources.files("madengine")) / "configs")  # type: ignore[attr-defined]
+
+        if not os.path.isdir(config_dir):
+            config_dir = str(Path(__file__).parent.parent / "configs")
+
+        GlobalHydra.instance().clear()
+
+        with initialize_config_dir(config_dir=config_dir, version_base=None):
+            cfg = compose(config_name="config", overrides=overrides)
+
+        if user_file:
+            user_cfg = OmegaConf.load(user_file)
+            OmegaConf.set_struct(cfg, False)
+            cfg = OmegaConf.merge(cfg, user_cfg)
+
+        return cfg
+
+    @staticmethod
+    def _parse_args(config_args: list) -> tuple:
+        """Separate user YAML file path from Hydra overrides."""
+        user_file = None
+        overrides = []
+        for arg in config_args:
+            if (
+                arg.endswith((".yaml", ".yml"))
+                and "=" not in arg
+                and not arg.startswith("+")
+            ):
+                if user_file:
+                    raise ConfigurationError("Only one YAML config file allowed")
+                user_file = arg
+            else:
+                overrides.append(arg)
+        return user_file, overrides
diff --git a/src/madengine/config/schema.py b/src/madengine/config/schema.py
new file mode 100644
index 00000000..358d1c9d
--- /dev/null
+++ b/src/madengine/config/schema.py
@@ -0,0 +1,84 @@
+"""Config validation for composed Hydra configs."""
+
+from omegaconf import DictConfig, OmegaConf
+
+KNOWN_TOP_LEVEL_KEYS = {
+    "defaults",
+    "platform",
+    "scheduler",
+    "hardware",
+    "launcher",
+    "model",
+    "docker",
+    "build",
+    "env_vars",
+    "debug",
+    "live_output",
+    "log_error",
+    "tools",
+    "pre_scripts",
+    "post_scripts",
+    "encapsulate_script",
+    "data_config",
+    "output",
+    "summary_output",
+    "gpu_vendor",
+    "guest_os",
+    "runtime",
+    "slurm",
+    "k8s",
+    "kubernetes",
+    "distributed",
+    "vllm",
+    "sglang_disagg",
+    "shared_data",
+    "timeout",
+    "gpu_type",
+    "gpu_memory_gb",
+    "gpus_per_node",
+    "data",
+}
+
+SUPPORTED_PLATFORMS = {"docker"}
+
+
+class ConfigValidator:
+    """Validates composed config for consistency."""
+
+    @staticmethod
+    def validate(cfg: DictConfig) -> list:
+        """Return list of validation errors (empty = valid)."""
+        errors = []
+
+        raw = (
+            OmegaConf.to_container(cfg, resolve=False)
+            if isinstance(cfg, DictConfig)
+            else {}
+        )
+
+        if raw.get("slurm") and raw.get("k8s"):
+            errors.append("Cannot specify both 'slurm' and 'k8s' sections")
+
+        dist = raw.get("distributed")
+        if isinstance(dist, dict):
+            if dist.get("enabled") and not dist.get("launcher"):
+                errors.append("distributed.enabled=true requires distributed.launcher")
+            nnodes = dist.get("nnodes")
+            if nnodes is not None:
+                if not isinstance(nnodes, int) or nnodes < 1:
+                    errors.append("distributed.nnodes must be a positive integer")
+
+        platform = raw.get("platform")
+        if isinstance(platform, dict):
+            ptype = platform.get("type")
+            if ptype and ptype not in SUPPORTED_PLATFORMS:
+                errors.append(
+                    f"Platform '{ptype}' is not yet supported. "
+                    f"Supported: {', '.join(sorted(SUPPORTED_PLATFORMS))}"
+                )
+
+        for key in raw:
+            if key not in KNOWN_TOP_LEVEL_KEYS:
+                errors.append(f"Unknown config key: '{key}'")
+
+        return errors
diff --git a/src/madengine/config/translator.py b/src/madengine/config/translator.py
new file mode 100644
index 00000000..f0448a8e
--- /dev/null
+++ b/src/madengine/config/translator.py
@@ -0,0 +1,75 @@
+"""Translates clean YAML config to internal additional_context format."""
+
+from omegaconf import DictConfig, OmegaConf
+
+
+class ConfigTranslator:
+    """Maps YAML config keys to internal additional_context dict format."""
+
+    KEY_MAP = {
+        "docker.build_args": "docker_build_arg",
+        "docker.env_vars": "docker_env_vars",
+        "docker.mounts": "docker_mounts",
+        "docker.gpus": "docker_gpus",
+        "docker.cpus": "docker_cpus",
+        "docker.additional_run_options": "additional_docker_run_options",
+        "log_error.pattern_scan": "log_error_pattern_scan",
+        "log_error.benign_patterns": "log_error_benign_patterns",
+        "log_error.patterns": "log_error_patterns",
+    }
+
+    EXTRACTED_KEYS = {
+        "model",
+        "build",
+        "platform",
+        "output",
+        "summary_output",
+        "data_config",
+        "live_output",
+    }
+
+    @classmethod
+    def to_additional_context(cls, cfg: DictConfig) -> tuple:
+        """Convert DictConfig to (additional_context, metadata) tuple.
+
+        Returns:
+            additional_context: dict in the format expected by existing pipeline.
+            metadata: dict with model.tags, build.registry, etc. for the CLI layer.
+        """
+        raw = OmegaConf.to_container(cfg, resolve=True)
+
+        context = {}
+        metadata = {}
+
+        for key, value in raw.items():
+            if key in cls.EXTRACTED_KEYS:
+                metadata[key] = value
+            elif key == "docker":
+                for subkey, subval in value.items():
+                    internal_key = cls.KEY_MAP.get(
+                        f"docker.{subkey}", f"docker_{subkey}"
+                    )
+                    if subval is None:
+                        continue
+                    if isinstance(subval, dict) and not subval:
+                        continue
+                    context[internal_key] = subval
+            elif key == "log_error":
+                for subkey, subval in value.items():
+                    internal_key = cls.KEY_MAP.get(
+                        f"log_error.{subkey}", f"log_error_{subkey}"
+                    )
+                    if isinstance(subval, list) and not subval:
+                        continue
+                    context[internal_key] = subval
+            elif key == "runtime":
+                metadata["runtime"] = value
+            else:
+                if value is not None:
+                    context[key] = value
+
+        model = metadata.get("model", {})
+        if model and model.get("container_image"):
+            context["MAD_CONTAINER_IMAGE"] = model["container_image"]
+
+        return context, metadata
diff --git a/src/madengine/configs/.gitkeep b/src/madengine/configs/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/src/madengine/configs/build/ci.yaml b/src/madengine/configs/build/ci.yaml
new file mode 100644
index 00000000..51eccec3
--- /dev/null
+++ b/src/madengine/configs/build/ci.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+docker:
+  clean_cache: true
+
+build:
+  registry: null
+  target_archs: []
+  manifest_output: build_manifest.json
diff --git a/src/madengine/configs/build/default.yaml b/src/madengine/configs/build/default.yaml
new file mode 100644
index 00000000..62deae2a
--- /dev/null
+++ b/src/madengine/configs/build/default.yaml
@@ -0,0 +1,5 @@
+# @package _global_
+build:
+  registry: null
+  target_archs: []
+  manifest_output: build_manifest.json
diff --git a/src/madengine/configs/build/multi_arch.yaml b/src/madengine/configs/build/multi_arch.yaml
new file mode 100644
index 00000000..67001f76
--- /dev/null
+++ b/src/madengine/configs/build/multi_arch.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+build:
+  registry: null
+  target_archs:
+    - gfx942
+    - gfx90a
+    - gfx908
+  manifest_output: build_manifest.json
diff --git a/src/madengine/configs/config.yaml b/src/madengine/configs/config.yaml
new file mode 100644
index 00000000..168647eb
--- /dev/null
+++ b/src/madengine/configs/config.yaml
@@ -0,0 +1,48 @@
+defaults:
+  - platform: docker
+  - scheduler: local
+  - hardware: amd
+  - launcher: none
+  - _self_
+
+model:
+  tags: []
+  manifest_file: null
+  container_image: null
+  skip_run: false
+  timeout: null
+
+docker:
+  build_args: {}
+  env_vars: {}
+  mounts: {}
+  gpus: null
+  cpus: null
+  additional_run_options: null
+  keep_alive: false
+  clean_cache: false
+
+build:
+  registry: null
+  target_archs: []
+  manifest_output: build_manifest.json
+
+env_vars: {}
+
+debug: false
+live_output: false
+
+log_error:
+  pattern_scan: true
+  benign_patterns: []
+  patterns: []
+
+tools: []
+pre_scripts: []
+post_scripts: []
+encapsulate_script: null
+
+data_config: data.json
+
+output: perf.csv
+summary_output: null
diff --git a/src/madengine/configs/data/local.yaml b/src/madengine/configs/data/local.yaml
new file mode 100644
index 00000000..8cdc5955
--- /dev/null
+++ b/src/madengine/configs/data/local.yaml
@@ -0,0 +1,4 @@
+# @package _global_
+data:
+  provider: local
+  path: null
diff --git a/src/madengine/configs/data/minio.yaml b/src/madengine/configs/data/minio.yaml
new file mode 100644
index 00000000..3f6ec625
--- /dev/null
+++ b/src/madengine/configs/data/minio.yaml
@@ -0,0 +1,7 @@
+# @package _global_
+data:
+  provider: minio
+  endpoint: null
+  bucket: null
+  access_key: null
+  secret_key: null
diff --git a/src/madengine/configs/data/nas.yaml b/src/madengine/configs/data/nas.yaml
new file mode 100644
index 00000000..e08c48e4
--- /dev/null
+++ b/src/madengine/configs/data/nas.yaml
@@ -0,0 +1,4 @@
+# @package _global_
+data:
+  provider: nas
+  mount_path: null
diff --git a/src/madengine/configs/data/s3.yaml b/src/madengine/configs/data/s3.yaml
new file mode 100644
index 00000000..a3f8a055
--- /dev/null
+++ b/src/madengine/configs/data/s3.yaml
@@ -0,0 +1,6 @@
+# @package _global_
+data:
+  provider: s3
+  bucket: null
+  prefix: null
+  region: null
diff --git a/src/madengine/configs/env/infiniband.yaml b/src/madengine/configs/env/infiniband.yaml
new file mode 100644
index 00000000..19f87571
--- /dev/null
+++ b/src/madengine/configs/env/infiniband.yaml
@@ -0,0 +1,6 @@
+# @package _global_
+env_vars:
+  NCCL_IB_DISABLE: "0"
+  NCCL_IB_HCA: "mlx5_0:1,mlx5_1:1"
+  NCCL_SOCKET_IFNAME: ib0
+  NCCL_NET_GDR_LEVEL: "3"
diff --git a/src/madengine/configs/env/miopen_defaults.yaml b/src/madengine/configs/env/miopen_defaults.yaml
new file mode 100644
index 00000000..05a87b3e
--- /dev/null
+++ b/src/madengine/configs/env/miopen_defaults.yaml
@@ -0,0 +1,4 @@
+# @package _global_
+env_vars:
+  MIOPEN_FIND_MODE: "1"
+  MIOPEN_USER_DB_PATH: /tmp/.miopen
diff --git a/src/madengine/configs/env/nccl_debug.yaml b/src/madengine/configs/env/nccl_debug.yaml
new file mode 100644
index 00000000..5e171ec0
--- /dev/null
+++ b/src/madengine/configs/env/nccl_debug.yaml
@@ -0,0 +1,5 @@
+# @package _global_
+env_vars:
+  NCCL_DEBUG: INFO
+  NCCL_DEBUG_SUBSYS: "INIT,NET,GRAPH"
+  TORCH_DISTRIBUTED_DEBUG: DETAIL
diff --git a/src/madengine/configs/env/nccl_tuned.yaml b/src/madengine/configs/env/nccl_tuned.yaml
new file mode 100644
index 00000000..3d434949
--- /dev/null
+++ b/src/madengine/configs/env/nccl_tuned.yaml
@@ -0,0 +1,7 @@
+# @package _global_
+env_vars:
+  NCCL_DEBUG: WARN
+  TORCH_NCCL_HIGH_PRIORITY: "1"
+  GPU_MAX_HW_QUEUES: "2"
+  NCCL_TIMEOUT: "600"
+  TORCH_NCCL_ASYNC_ERROR_HANDLING: "1"
diff --git a/src/madengine/configs/hardware/amd.yaml b/src/madengine/configs/hardware/amd.yaml
new file mode 100644
index 00000000..670f6db1
--- /dev/null
+++ b/src/madengine/configs/hardware/amd.yaml
@@ -0,0 +1,18 @@
+# @package _global_
+gpu_vendor: AMD
+guest_os: UBUNTU
+
+runtime:
+  devices:
+    - /dev/kfd
+    - /dev/dri
+    - /dev/infiniband
+  capabilities:
+    - SYS_PTRACE
+  security_opts:
+    - seccomp=unconfined
+  network_mode: host
+  ipc: host
+  groups:
+    - video
+  use_gpu_flag: false
diff --git a/src/madengine/configs/hardware/cpu.yaml b/src/madengine/configs/hardware/cpu.yaml
new file mode 100644
index 00000000..f08463a6
--- /dev/null
+++ b/src/madengine/configs/hardware/cpu.yaml
@@ -0,0 +1,12 @@
+# @package _global_
+gpu_vendor: null
+guest_os: UBUNTU
+
+runtime:
+  devices: []
+  capabilities: []
+  security_opts: []
+  network_mode: null
+  ipc: null
+  groups: []
+  use_gpu_flag: false
diff --git a/src/madengine/configs/hardware/nvidia.yaml b/src/madengine/configs/hardware/nvidia.yaml
new file mode 100644
index 00000000..471d3467
--- /dev/null
+++ b/src/madengine/configs/hardware/nvidia.yaml
@@ -0,0 +1,12 @@
+# @package _global_
+gpu_vendor: NVIDIA
+guest_os: UBUNTU
+
+runtime:
+  devices: []
+  capabilities: []
+  security_opts: []
+  network_mode: host
+  ipc: host
+  groups: []
+  use_gpu_flag: true
diff --git a/src/madengine/configs/launcher/deepspeed.yaml b/src/madengine/configs/launcher/deepspeed.yaml
new file mode 100644
index 00000000..99df001f
--- /dev/null
+++ b/src/madengine/configs/launcher/deepspeed.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+distributed:
+  enabled: true
+  launcher: deepspeed
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
diff --git a/src/madengine/configs/launcher/megatron.yaml b/src/madengine/configs/launcher/megatron.yaml
new file mode 100644
index 00000000..0a131248
--- /dev/null
+++ b/src/madengine/configs/launcher/megatron.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+distributed:
+  enabled: true
+  launcher: torchrun
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
diff --git a/src/madengine/configs/launcher/native.yaml b/src/madengine/configs/launcher/native.yaml
new file mode 100644
index 00000000..248e1b77
--- /dev/null
+++ b/src/madengine/configs/launcher/native.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+distributed:
+  enabled: true
+  launcher: native
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
diff --git a/src/madengine/configs/launcher/none.yaml b/src/madengine/configs/launcher/none.yaml
new file mode 100644
index 00000000..f7e60ebe
--- /dev/null
+++ b/src/madengine/configs/launcher/none.yaml
@@ -0,0 +1,3 @@
+# @package _global_
+distributed:
+  enabled: false
diff --git a/src/madengine/configs/launcher/primus.yaml b/src/madengine/configs/launcher/primus.yaml
new file mode 100644
index 00000000..ed548efe
--- /dev/null
+++ b/src/madengine/configs/launcher/primus.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+distributed:
+  enabled: true
+  launcher: primus
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
diff --git a/src/madengine/configs/launcher/sglang.yaml b/src/madengine/configs/launcher/sglang.yaml
new file mode 100644
index 00000000..80509f1f
--- /dev/null
+++ b/src/madengine/configs/launcher/sglang.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+distributed:
+  enabled: true
+  launcher: sglang
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  port: 29500
diff --git a/src/madengine/configs/launcher/sglang_disagg.yaml b/src/madengine/configs/launcher/sglang_disagg.yaml
new file mode 100644
index 00000000..77fb212c
--- /dev/null
+++ b/src/madengine/configs/launcher/sglang_disagg.yaml
@@ -0,0 +1,13 @@
+# @package _global_
+distributed:
+  enabled: true
+  launcher: sglang-disagg
+  backend: nccl
+  nnodes: 3
+  nproc_per_node: 8
+  port: 29500
+
+sglang_disagg:
+  prefill_nodes: null
+  decode_nodes: null
+  transfer_backend: mooncake
diff --git a/src/madengine/configs/launcher/torchrun.yaml b/src/madengine/configs/launcher/torchrun.yaml
new file mode 100644
index 00000000..4e7798f2
--- /dev/null
+++ b/src/madengine/configs/launcher/torchrun.yaml
@@ -0,0 +1,9 @@
+# @package _global_
+distributed:
+  enabled: true
+  launcher: torchrun
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
+  port: 29500
diff --git a/src/madengine/configs/launcher/torchtitan.yaml b/src/madengine/configs/launcher/torchtitan.yaml
new file mode 100644
index 00000000..0a131248
--- /dev/null
+++ b/src/madengine/configs/launcher/torchtitan.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+distributed:
+  enabled: true
+  launcher: torchrun
+  backend: nccl
+  nnodes: 1
+  nproc_per_node: 8
+  master_port: 29500
diff --git a/src/madengine/configs/launcher/vllm.yaml b/src/madengine/configs/launcher/vllm.yaml
new file mode 100644
index 00000000..cb38b23a
--- /dev/null
+++ b/src/madengine/configs/launcher/vllm.yaml
@@ -0,0 +1,11 @@
+# @package _global_
+distributed:
+  enabled: true
+  launcher: vllm
+  nnodes: 1
+  nproc_per_node: 4
+
+vllm:
+  kv_cache_size: 0.7
+  max_model_len: null
+  tensor_parallel_size: null
diff --git a/src/madengine/configs/platform/bare_metal.yaml b/src/madengine/configs/platform/bare_metal.yaml
new file mode 100644
index 00000000..09825ab0
--- /dev/null
+++ b/src/madengine/configs/platform/bare_metal.yaml
@@ -0,0 +1,3 @@
+# @package _global_
+platform:
+  type: bare_metal
diff --git a/src/madengine/configs/platform/docker.yaml b/src/madengine/configs/platform/docker.yaml
new file mode 100644
index 00000000..cdc555ec
--- /dev/null
+++ b/src/madengine/configs/platform/docker.yaml
@@ -0,0 +1,3 @@
+# @package _global_
+platform:
+  type: docker
diff --git a/src/madengine/configs/platform/podman.yaml b/src/madengine/configs/platform/podman.yaml
new file mode 100644
index 00000000..a1b85147
--- /dev/null
+++ b/src/madengine/configs/platform/podman.yaml
@@ -0,0 +1,3 @@
+# @package _global_
+platform:
+  type: podman
diff --git a/src/madengine/configs/platform/singularity.yaml b/src/madengine/configs/platform/singularity.yaml
new file mode 100644
index 00000000..0cca82a9
--- /dev/null
+++ b/src/madengine/configs/platform/singularity.yaml
@@ -0,0 +1,3 @@
+# @package _global_
+platform:
+  type: singularity
diff --git a/src/madengine/configs/profile/a100_8gpu.yaml b/src/madengine/configs/profile/a100_8gpu.yaml
new file mode 100644
index 00000000..ca5f58a5
--- /dev/null
+++ b/src/madengine/configs/profile/a100_8gpu.yaml
@@ -0,0 +1,18 @@
+# @package _global_
+gpu_vendor: NVIDIA
+guest_os: UBUNTU
+gpu_type: a100
+gpu_memory_gb: 80
+gpus_per_node: 8
+
+runtime:
+  devices: []
+  capabilities: []
+  security_opts: []
+  network_mode: host
+  ipc: host
+  groups: []
+  use_gpu_flag: true
+
+distributed:
+  nproc_per_node: 8
diff --git a/src/madengine/configs/profile/h100_8gpu.yaml b/src/madengine/configs/profile/h100_8gpu.yaml
new file mode 100644
index 00000000..95095f81
--- /dev/null
+++ b/src/madengine/configs/profile/h100_8gpu.yaml
@@ -0,0 +1,18 @@
+# @package _global_
+gpu_vendor: NVIDIA
+guest_os: UBUNTU
+gpu_type: h100
+gpu_memory_gb: 80
+gpus_per_node: 8
+
+runtime:
+  devices: []
+  capabilities: []
+  security_opts: []
+  network_mode: host
+  ipc: host
+  groups: []
+  use_gpu_flag: true
+
+distributed:
+  nproc_per_node: 8
diff --git a/src/madengine/configs/profile/mi250x_4gpu.yaml b/src/madengine/configs/profile/mi250x_4gpu.yaml
new file mode 100644
index 00000000..67a580ae
--- /dev/null
+++ b/src/madengine/configs/profile/mi250x_4gpu.yaml
@@ -0,0 +1,11 @@
+# @package _global_
+gpu_type: mi250x
+gpu_memory_gb: 128
+gpus_per_node: 4
+
+distributed:
+  nproc_per_node: 4
+
+env_vars:
+  GPU_MAX_HW_QUEUES: "2"
+  HSA_ENABLE_SDMA: "0"
diff --git a/src/madengine/configs/profile/mi300x_8gpu.yaml b/src/madengine/configs/profile/mi300x_8gpu.yaml
new file mode 100644
index 00000000..52af4476
--- /dev/null
+++ b/src/madengine/configs/profile/mi300x_8gpu.yaml
@@ -0,0 +1,12 @@
+# @package _global_
+gpu_type: mi300x
+gpu_memory_gb: 192
+gpus_per_node: 8
+
+distributed:
+  nproc_per_node: 8
+
+env_vars:
+  GPU_MAX_HW_QUEUES: "2"
+  HSA_ENABLE_SDMA: "0"
+  HSA_FORCE_FINE_GRAIN_PCIE: "1"
diff --git a/src/madengine/configs/profile/mi300x_single.yaml b/src/madengine/configs/profile/mi300x_single.yaml
new file mode 100644
index 00000000..ee693ac5
--- /dev/null
+++ b/src/madengine/configs/profile/mi300x_single.yaml
@@ -0,0 +1,7 @@
+# @package _global_
+gpu_type: mi300x
+gpu_memory_gb: 192
+gpus_per_node: 1
+
+distributed:
+  nproc_per_node: 1
diff --git a/src/madengine/configs/scheduler/k8s.yaml b/src/madengine/configs/scheduler/k8s.yaml
new file mode 100644
index 00000000..6e946ab2
--- /dev/null
+++ b/src/madengine/configs/scheduler/k8s.yaml
@@ -0,0 +1,31 @@
+# @package _global_
+k8s:
+  kubeconfig: ~/.kube/config
+  namespace: default
+  image_pull_policy: Always
+  backoff_limit: 3
+  ttl_seconds_after_finished: null
+  allow_privileged_profiling: null
+  gpu_count: null
+  gpu_resource_name: amd.com/gpu
+  memory: null
+  memory_limit: null
+  cpu: null
+  cpu_limit: null
+  host_ipc: true
+  node_selector: {}
+  tolerations: []
+  nfs_storage_class: nfs-banff
+  local_path_storage_class: local-path
+  data_storage_class: nfs-banff
+  recreate_shared_data_pvc: false
+  results_pvc: null
+  data_pvc: null
+  output_dir: null
+  secrets:
+    strategy: from_local_credentials
+    image_pull_secret_names: []
+    runtime_secret_name: null
+
+env_vars:
+  OMP_NUM_THREADS: "8"
diff --git a/src/madengine/configs/scheduler/local.yaml b/src/madengine/configs/scheduler/local.yaml
new file mode 100644
index 00000000..03bfe3db
--- /dev/null
+++ b/src/madengine/configs/scheduler/local.yaml
@@ -0,0 +1 @@
+# @package _global_
diff --git a/src/madengine/configs/scheduler/slurm.yaml b/src/madengine/configs/scheduler/slurm.yaml
new file mode 100644
index 00000000..ad0d6494
--- /dev/null
+++ b/src/madengine/configs/scheduler/slurm.yaml
@@ -0,0 +1,21 @@
+# @package _global_
+slurm:
+  partition: amd-rccl
+  nodes: 1
+  gpus_per_node: 8
+  time: "24:00:00"
+  output_dir: ./slurm_results
+  exclusive: true
+  modules: []
+  account: null
+  qos: null
+  constraint: null
+  nodelist: null
+  exclude: null
+  results_dir: null
+  shared_workspace: null
+  network_interface: null
+
+env_vars:
+  OMP_NUM_THREADS: "8"
+  MIOPEN_FIND_MODE: "1"
diff --git a/src/madengine/configs/tools/power_profiler.yaml b/src/madengine/configs/tools/power_profiler.yaml
new file mode 100644
index 00000000..61a5d29c
--- /dev/null
+++ b/src/madengine/configs/tools/power_profiler.yaml
@@ -0,0 +1,9 @@
+# @package _global_
+tools:
+  - name: gpu_info_power_profiler
+    env_vars:
+      POWER_DEVICE: all
+      POWER_SAMPLING_RATE: "0.1"
+      POWER_MODE: power
+      POWER_DUAL_GCD: "false"
+      POWER_OUTPUT_FILE: gpu_info_power_profiler_output.csv
diff --git a/src/madengine/configs/tools/rocm_trace_lite.yaml b/src/madengine/configs/tools/rocm_trace_lite.yaml
new file mode 100644
index 00000000..7142ed28
--- /dev/null
+++ b/src/madengine/configs/tools/rocm_trace_lite.yaml
@@ -0,0 +1,5 @@
+# @package _global_
+tools:
+  - name: rocm_trace_lite
+    env_vars:
+      RTL_MODE: lite
diff --git a/src/madengine/configs/tools/rocprofv3_comprehensive.yaml b/src/madengine/configs/tools/rocprofv3_comprehensive.yaml
new file mode 100644
index 00000000..001cc4c8
--- /dev/null
+++ b/src/madengine/configs/tools/rocprofv3_comprehensive.yaml
@@ -0,0 +1,17 @@
+# @package _global_
+tools:
+  - name: rocprofv3_full
+    env_vars:
+      RCCL_DEBUG: INFO
+      HSA_ENABLE_SDMA: "0"
+  - name: gpu_info_power_profiler
+    env_vars:
+      POWER_DEVICE: all
+      POWER_SAMPLING_RATE: "0.1"
+      POWER_DUAL_GCD: "false"
+  - name: gpu_info_vram_profiler
+    env_vars:
+      VRAM_DEVICE: all
+      VRAM_SAMPLING_RATE: "0.1"
+  - name: miopen_trace
+  - name: rocblas_trace
diff --git a/src/madengine/configs/tools/rocprofv3_lightweight.yaml b/src/madengine/configs/tools/rocprofv3_lightweight.yaml
new file mode 100644
index 00000000..7064316f
--- /dev/null
+++ b/src/madengine/configs/tools/rocprofv3_lightweight.yaml
@@ -0,0 +1,3 @@
+# @package _global_
+tools:
+  - name: rocprofv3_lightweight
diff --git a/src/madengine/configs/tools/vram_profiler.yaml b/src/madengine/configs/tools/vram_profiler.yaml
new file mode 100644
index 00000000..c53c6f70
--- /dev/null
+++ b/src/madengine/configs/tools/vram_profiler.yaml
@@ -0,0 +1,9 @@
+# @package _global_
+tools:
+  - name: gpu_info_vram_profiler
+    env_vars:
+      VRAM_DEVICE: all
+      VRAM_SAMPLING_RATE: "0.1"
+      VRAM_MODE: vram
+      VRAM_DUAL_GCD: "false"
+      VRAM_OUTPUT_FILE: gpu_info_vram_profiler_output.csv
diff --git a/src/madengine/core/auth.py b/src/madengine/core/auth.py
index 15f0a0a6..48e1cd0a 100644
--- a/src/madengine/core/auth.py
+++ b/src/madengine/core/auth.py
@@ -13,11 +13,7 @@
 import shlex
 from typing import Dict, Optional
 
-from madengine.core.errors import (
-    ConfigurationError,
-    create_error_context,
-    handle_error,
-)
+from madengine.core.errors import ConfigurationError, create_error_context, handle_error
 
 
 def load_credentials() -> Optional[Dict]:
@@ -40,7 +36,10 @@ def load_credentials() -> Optional[Dict]:
             with open(credential_file) as f:
                 loaded = json.load(f)
             if not isinstance(loaded, dict):
-                raise ValueError("credential.json must contain a JSON object, not " + type(loaded).__name__)
+                raise ValueError(
+                    "credential.json must contain a JSON object, not "
+                    + type(loaded).__name__
+                )
             credentials = loaded
             print(
                 f"Loaded credentials from {credential_file}: "
@@ -163,7 +162,7 @@ def login_to_registry(
     # Pass the password via an environment variable so it never appears in
     # the process argument list (visible via /proc or ps to other users).
     quoted_username = shlex.quote(username)
-    login_command = "printf %s \"$MAD_REGISTRY_PASSWORD\" | docker login"
+    login_command = 'printf %s "$MAD_REGISTRY_PASSWORD" | docker login'
     if registry and registry.lower() not in ["docker.io", "dockerhub"]:
         login_command += f" {shlex.quote(str(registry))}"
     login_command += f" --username {quoted_username} --password-stdin"
@@ -177,8 +176,6 @@ def login_to_registry(
             f"{registry or 'DockerHub'}[/green]"
         )
     except Exception as e:
-        rich_console.print(
-            f"[red]Failed to login to registry {registry}: {e}[/red]"
-        )
+        rich_console.print(f"[red]Failed to login to registry {registry}: {e}[/red]")
         if raise_on_failure:
             raise
diff --git a/src/madengine/core/console.py b/src/madengine/core/console.py
index 57d7b329..d89488b0 100644
--- a/src/madengine/core/console.py
+++ b/src/madengine/core/console.py
@@ -5,10 +5,11 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
+import re
+
 # built-in modules
 import subprocess
 import typing
-import re
 
 
 class Console:
diff --git a/src/madengine/core/constants.py b/src/madengine/core/constants.py
index d1afa4c9..f4cf38cb 100644
--- a/src/madengine/core/constants.py
+++ b/src/madengine/core/constants.py
@@ -22,11 +22,12 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
-# built-in modules
-import os
 import json
 import logging
 
+# built-in modules
+import os
+
 
 # Utility function for optional verbose logging of configuration
 def _log_config_info(message: str, force_print: bool = False):
@@ -46,14 +47,14 @@ def _log_config_info(message: str, force_print: bool = False):
 
 def _setup_model_dir():
     """Setup model directory if MODEL_DIR environment variable is set.
-    
+
     MODEL_DIR defaults to "." (current directory) if not set.
     Only copies if MODEL_DIR points to a different directory than current working directory.
     """
     # Get absolute paths to compare
     model_dir_abs = os.path.abspath(MODEL_DIR)
     cwd_abs = os.path.abspath(".")
-    
+
     # Only copy if MODEL_DIR points to a different directory (not current dir)
     if model_dir_abs != cwd_abs:
         # Copy MODEL_DIR to the current working directory.
diff --git a/src/madengine/core/context.py b/src/madengine/core/context.py
index fb934483..dc598291 100644
--- a/src/madengine/core/context.py
+++ b/src/madengine/core/context.py
@@ -23,7 +23,11 @@
 # third-party modules
 from madengine.core.console import Console
 from madengine.utils.rocm_path_resolver import resolve_host_rocm_path
-from madengine.utils.gpu_validator import validate_rocm_installation, GPUInstallationError, GPUVendor
+from madengine.utils.gpu_validator import (
+    validate_rocm_installation,
+    GPUInstallationError,
+    GPUVendor,
+)
 from madengine.utils.gpu_tool_factory import get_gpu_tool_manager
 from madengine.utils.gpu_tool_manager import BaseGPUToolManager
 
@@ -180,10 +184,14 @@ def init_build_context(self, detect_gpu_arch: bool = False) -> None:
 
         # Optionally auto-detect GPU architecture for local full-workflow builds (build+run).
         # Skipped for standalone `madengine build` on non-GPU/CI nodes (detect_gpu_arch=False).
-        if detect_gpu_arch and "MAD_SYSTEM_GPU_ARCHITECTURE" not in self.ctx.get("docker_build_arg", {}):
+        if detect_gpu_arch and "MAD_SYSTEM_GPU_ARCHITECTURE" not in self.ctx.get(
+            "docker_build_arg", {}
+        ):
             try:
                 from madengine.utils.gpu_validator import detect_gpu_vendor
-                from madengine.execution.dockerfile_utils import normalize_architecture_name
+                from madengine.execution.dockerfile_utils import (
+                    normalize_architecture_name,
+                )
 
                 vendor = detect_gpu_vendor(self._rocm_path)
                 if vendor in (GPUVendor.AMD, GPUVendor.NVIDIA):
@@ -193,11 +201,17 @@ def init_build_context(self, detect_gpu_arch: bool = False) -> None:
                     self.ctx["docker_build_arg"]["MAD_SYSTEM_GPU_ARCHITECTURE"] = arch
                     print(f"Auto-detected GPU architecture for build: {arch}")
                 else:
-                    print("Warning: No supported GPU detected; MAD_SYSTEM_GPU_ARCHITECTURE will not be set automatically.")
-                    print("Consider providing it via --additional-context if needed for build args.")
+                    print(
+                        "Warning: No supported GPU detected; MAD_SYSTEM_GPU_ARCHITECTURE will not be set automatically."
+                    )
+                    print(
+                        "Consider providing it via --additional-context if needed for build args."
+                    )
             except Exception as e:
                 print(f"Warning: Could not auto-detect GPU architecture for build: {e}")
-                print("Consider providing MAD_SYSTEM_GPU_ARCHITECTURE via --additional-context if needed for build args.")
+                print(
+                    "Consider providing MAD_SYSTEM_GPU_ARCHITECTURE via --additional-context if needed for build args."
+                )
 
         # Don't initialize NUMA balancing check for build-only nodes
         # This is runtime-specific and should be handled on execution nodes
@@ -352,10 +366,10 @@ def ensure_system_context(self) -> None:
 
     def _get_tool_manager(self) -> BaseGPUToolManager:
         """Get GPU tool manager for the current vendor (lazy initialization).
-        
+
         Returns:
             GPU tool manager instance
-            
+
         Raises:
             ValueError: If GPU vendor cannot be determined or is unsupported
         """
@@ -371,9 +385,11 @@ def _get_tool_manager(self) -> BaseGPUToolManager:
                     vendor = None  # Auto-detect
             else:
                 vendor = None  # Auto-detect
-            
-            self._gpu_tool_manager = get_gpu_tool_manager(vendor, rocm_path=self._rocm_path)
-        
+
+            self._gpu_tool_manager = get_gpu_tool_manager(
+                vendor, rocm_path=self._rocm_path
+            )
+
         return self._gpu_tool_manager
 
     def get_ctx_test(self) -> str:
@@ -403,19 +419,22 @@ def get_gpu_vendor(self) -> str:
             What types of GPU vendors are supported?
             - NVIDIA
             - AMD
-            
+
         PR #54 Enhancement:
             Added fallback to rocm-smi if amd-smi is missing.
         """
         # Check NVIDIA first (simplest check)
         if os.path.exists("/usr/bin/nvidia-smi"):
             try:
-                result = self.console.sh("/usr/bin/nvidia-smi > /dev/null 2>&1 && echo 'NVIDIA' || echo ''", timeout=180)
+                result = self.console.sh(
+                    "/usr/bin/nvidia-smi > /dev/null 2>&1 && echo 'NVIDIA' || echo ''",
+                    timeout=180,
+                )
                 if result and result.strip() == "NVIDIA":
                     return "NVIDIA"
             except Exception as e:
                 print(f"Warning: nvidia-smi check failed: {e}")
-        
+
         # Check AMD - try amd-smi first, fallback to rocm-smi (PR #54)
         # Use configurable ROCm path (MAD_ROCM_PATH / ROCM_PATH) for non-default installs
         amd_smi_paths = [
@@ -426,22 +445,28 @@ def get_gpu_vendor(self) -> str:
             if os.path.exists(amd_smi_path):
                 try:
                     # Verify amd-smi actually works (180s timeout for slow GPU initialization)
-                    result = self.console.sh(f"{amd_smi_path} list > /dev/null 2>&1 && echo 'AMD' || echo ''", timeout=180)
+                    result = self.console.sh(
+                        f"{amd_smi_path} list > /dev/null 2>&1 && echo 'AMD' || echo ''",
+                        timeout=180,
+                    )
                     if result and result.strip() == "AMD":
                         return "AMD"
                 except Exception as e:
                     print(f"Warning: amd-smi check failed for {amd_smi_path}: {e}")
-        
+
         # Fallback to rocm-smi (PR #54)
         rocm_smi_path = os.path.join(self._rocm_path, "bin", "rocm-smi")
         if os.path.exists(rocm_smi_path):
             try:
-                result = self.console.sh(f"{rocm_smi_path} --showid > /dev/null 2>&1 && echo 'AMD' || echo ''", timeout=180)
+                result = self.console.sh(
+                    f"{rocm_smi_path} --showid > /dev/null 2>&1 && echo 'AMD' || echo ''",
+                    timeout=180,
+                )
                 if result and result.strip() == "AMD":
                     return "AMD"
             except Exception as e:
                 print(f"Warning: rocm-smi check failed: {e}")
-        
+
         return "Unable to detect GPU vendor"
 
     def get_host_os(self) -> str:
@@ -500,20 +525,19 @@ def get_system_ngpus(self) -> int:
             What types of GPU vendors are supported?
             - NVIDIA
             - AMD
-            
+
         Enhancement:
             Uses version-aware tool manager with automatic fallback (PR #54).
         """
         vendor = self.ctx["docker_env_vars"]["MAD_GPU_VENDOR"]
-        
+
         if vendor == "AMD":
             try:
                 tool_manager = self._get_tool_manager()
                 return tool_manager.get_gpu_count()
             except Exception as e:
                 raise RuntimeError(
-                    f"Unable to determine number of AMD GPUs. "
-                    f"Error: {e}"
+                    f"Unable to determine number of AMD GPUs. " f"Error: {e}"
                 )
         elif vendor == "NVIDIA":
             try:
@@ -522,12 +546,13 @@ def get_system_ngpus(self) -> int:
             except Exception as e:
                 # Fallback to direct command for NVIDIA (longer timeout for slow compute nodes)
                 try:
-                    number_gpus = int(self.console.sh("nvidia-smi -L | wc -l", timeout=180))
+                    number_gpus = int(
+                        self.console.sh("nvidia-smi -L | wc -l", timeout=180)
+                    )
                     return number_gpus
                 except Exception:
                     raise RuntimeError(
-                        f"Unable to determine number of NVIDIA GPUs. "
-                        f"Error: {e}"
+                        f"Unable to determine number of NVIDIA GPUs. " f"Error: {e}"
                     )
         else:
             raise RuntimeError(f"Unable to determine gpu vendor: {vendor}")
@@ -569,32 +594,31 @@ def get_system_gpu_architecture(self) -> str:
 
     def get_system_gpu_product_name(self) -> str:
         """Get system GPU product name with fallback (PR #54).
-        
+
         Returns:
             str: The GPU product name (e.g., AMD Instinct MI300X, NVIDIA H100 80GB HBM3).
-        
+
         Raises:
             RuntimeError: If the GPU vendor is not detected.
             RuntimeError: If the GPU product name is unable to determine.
-        
+
         Note:
             What types of GPU vendors are supported?
             - NVIDIA
             - AMD
-            
+
         PR #54 Enhancement:
             Added rocm-smi fallback for AMD GPUs when amd-smi unavailable.
         """
         vendor = self.ctx["docker_env_vars"]["MAD_GPU_VENDOR"]
-        
+
         if vendor == "AMD":
             try:
                 tool_manager = self._get_tool_manager()
                 return tool_manager.get_gpu_product_name(gpu_id=0)
             except Exception as e:
                 raise RuntimeError(
-                    f"Unable to determine AMD GPU product name. "
-                    f"Error: {e}"
+                    f"Unable to determine AMD GPU product name. " f"Error: {e}"
                 )
         elif vendor == "NVIDIA":
             try:
@@ -603,58 +627,66 @@ def get_system_gpu_product_name(self) -> str:
             except Exception as e:
                 # Fallback to direct command for NVIDIA (longer timeout for slow compute nodes)
                 try:
-                    return self.console.sh("nvidia-smi --query-gpu=name --format=csv,noheader,nounits -i 0", timeout=180)
+                    return self.console.sh(
+                        "nvidia-smi --query-gpu=name --format=csv,noheader,nounits -i 0",
+                        timeout=180,
+                    )
                 except Exception:
                     raise RuntimeError(
-                        f"Unable to determine NVIDIA GPU product name. "
-                        f"Error: {e}"
+                        f"Unable to determine NVIDIA GPU product name. " f"Error: {e}"
                     )
         else:
-            raise RuntimeError(f"Unable to determine gpu product name for vendor: {vendor}")
+            raise RuntimeError(
+                f"Unable to determine gpu product name for vendor: {vendor}"
+            )
 
     def get_system_hip_version(self):
         """Get HIP/CUDA version using tool manager.
-        
+
         Returns:
             str: Version string (e.g., "6.4" for ROCm, "12.0" for CUDA)
-            
+
         Raises:
             RuntimeError: If version cannot be determined
-            
+
         Enhancement:
             Uses tool manager for robust version detection with multiple fallbacks.
         """
-        vendor = self.ctx['docker_env_vars']['MAD_GPU_VENDOR']
-        
-        if vendor == 'AMD':
+        vendor = self.ctx["docker_env_vars"]["MAD_GPU_VENDOR"]
+
+        if vendor == "AMD":
             try:
                 tool_manager = self._get_tool_manager()
                 version_str = tool_manager.get_version()
                 if version_str:
                     # Return major.minor only (e.g., "6.4.1" -> "6.4")
-                    parts = version_str.split('.')
+                    parts = version_str.split(".")
                     if len(parts) >= 2:
                         return f"{parts[0]}.{parts[1]}"
                     return version_str
-                
+
                 # Fallback to hipconfig if tool manager fails
                 version = self.console.sh("hipconfig --version | cut -d'.' -f1,2")
                 if not version or version.strip() == "":
                     raise RuntimeError("hipconfig returned empty version")
                 return version
-                
+
             except Exception as e:
                 raise RuntimeError(
                     f"Unable to determine HIP version. "
                     f"Ensure ROCm is installed and hipconfig is accessible. "
                     f"Error: {e}"
                 )
-        elif vendor == 'NVIDIA':
+        elif vendor == "NVIDIA":
             try:
                 tool_manager = self._get_tool_manager()
-                return tool_manager.get_version() or self.console.sh("nvcc --version | sed -n 's/^.*release \\([0-9]\\+\\.[0-9]\\+\\).*$/\\1/p'")
+                return tool_manager.get_version() or self.console.sh(
+                    "nvcc --version | sed -n 's/^.*release \\([0-9]\\+\\.[0-9]\\+\\).*$/\\1/p'"
+                )
             except Exception:
-                return self.console.sh("nvcc --version | sed -n 's/^.*release \\([0-9]\\+\\.[0-9]\\+\\).*$/\\1/p'")
+                return self.console.sh(
+                    "nvcc --version | sed -n 's/^.*release \\([0-9]\\+\\.[0-9]\\+\\).*$/\\1/p'"
+                )
         else:
             raise RuntimeError(f"Unable to determine hip version for vendor: {vendor}")
 
@@ -692,11 +724,11 @@ def get_gpu_renderD_nodes(self) -> typing.Optional[typing.List[int]]:
         """
         # Initialize the GPU renderD nodes.
         gpu_renderDs = None
-        
+
         # Check if the GPU vendor is AMD.
-        if self.ctx['docker_env_vars']['MAD_GPU_VENDOR'] != 'AMD':
+        if self.ctx["docker_env_vars"]["MAD_GPU_VENDOR"] != "AMD":
             return gpu_renderDs
-            
+
         try:
             # Get ROCm version using tool manager for robust detection (PR #54)
             try:
@@ -707,45 +739,62 @@ def get_gpu_renderD_nodes(self) -> typing.Optional[typing.List[int]]:
             except Exception as e:
                 # Fallback to direct file read
                 version_file = os.path.join(self._rocm_path, ".info", "version")
-                rocm_version_str = self.console.sh(f"cat {version_file} | cut -d'-' -f1")
+                rocm_version_str = self.console.sh(
+                    f"cat {version_file} | cut -d'-' -f1"
+                )
                 if not rocm_version_str or rocm_version_str.strip() == "":
-                    raise RuntimeError(f"Failed to retrieve ROCm version from {version_file}")
-                
+                    raise RuntimeError(
+                        f"Failed to retrieve ROCm version from {version_file}"
+                    )
+
                 # Parse version safely
                 try:
                     rocm_version = tuple(map(int, rocm_version_str.strip().split(".")))
                 except (ValueError, AttributeError) as parse_err:
-                    raise RuntimeError(f"Failed to parse ROCm version '{rocm_version_str}': {parse_err}")
-            
+                    raise RuntimeError(
+                        f"Failed to parse ROCm version '{rocm_version_str}': {parse_err}"
+                    )
+
             # Get renderDs from KFD properties
             # Try KFD topology first (preferred), but gracefully handle permission errors
             # On HPC/multi-user systems, KFD topology files may be restricted
             kfd_renderDs = None
             kfd_properties = []
             try:
-                kfd_output = self.console.sh("grep -r drm_render_minor /sys/devices/virtual/kfd/kfd/topology/nodes")
+                kfd_output = self.console.sh(
+                    "grep -r drm_render_minor /sys/devices/virtual/kfd/kfd/topology/nodes"
+                )
                 if kfd_output and kfd_output.strip():
                     kfd_properties = kfd_output.split("\n")
                     # Filter out empty lines and CPU entries (renderD value 0)
                     kfd_properties = [
-                        line for line in kfd_properties 
+                        line
+                        for line in kfd_properties
                         if line.strip() and line.split() and int(line.split()[-1]) != 0
                     ]
                     if kfd_properties:
-                        kfd_renderDs = [int(line.split()[-1]) for line in kfd_properties]
+                        kfd_renderDs = [
+                            int(line.split()[-1]) for line in kfd_properties
+                        ]
             except Exception as kfd_error:
                 # KFD topology read failed (common on HPC clusters with restricted permissions)
                 # Will use amd-smi/rocm-smi fallback which provides renderD info directly
-                print(f"Note: KFD topology not accessible ({kfd_error}), using ROCm tools fallback")
+                print(
+                    f"Note: KFD topology not accessible ({kfd_error}), using ROCm tools fallback"
+                )
 
             # Get gpu id - renderD mapping using unique id if ROCm < 6.4.1 and node id otherwise
             # node id is more robust but is only available from 6.4.1 (PR #54)
             if rocm_version < (6, 4, 1):
                 # Legacy method using unique_id
-                kfd_unique_output = self.console.sh("grep -r unique_id /sys/devices/virtual/kfd/kfd/topology/nodes")
+                kfd_unique_output = self.console.sh(
+                    "grep -r unique_id /sys/devices/virtual/kfd/kfd/topology/nodes"
+                )
                 if not kfd_unique_output:
-                    raise RuntimeError("Failed to retrieve unique_id from KFD properties")
-                
+                    raise RuntimeError(
+                        "Failed to retrieve unique_id from KFD properties"
+                    )
+
                 kfd_unique_ids_raw = kfd_unique_output.split("\n")
                 # Convert unique_ids to hex, filtering empty lines
                 kfd_unique_ids = []
@@ -755,7 +804,9 @@ def get_gpu_renderD_nodes(self) -> typing.Optional[typing.List[int]]:
                             unique_id_int = int(item.split()[-1])
                             kfd_unique_ids.append(hex(unique_id_int))
                         except (ValueError, IndexError) as e:
-                            print(f"Warning: Failed to parse unique_id from line '{item}': {e}")
+                            print(
+                                f"Warning: Failed to parse unique_id from line '{item}': {e}"
+                            )
                             continue
 
                 if len(kfd_unique_ids) != len(kfd_renderDs):
@@ -766,54 +817,67 @@ def get_gpu_renderD_nodes(self) -> typing.Optional[typing.List[int]]:
 
                 # Map unique ids to renderDs
                 uniqueid_renderD_map = {
-                    unique_id: renderD 
+                    unique_id: renderD
                     for unique_id, renderD in zip(kfd_unique_ids, kfd_renderDs)
                 }
 
                 # Get GPU ID to unique ID mapping from rocm-smi (longer timeout for slow compute nodes)
-                rsmi_output = self.console.sh("rocm-smi --showuniqueid | grep 'Unique.*:'", timeout=180)
+                rsmi_output = self.console.sh(
+                    "rocm-smi --showuniqueid | grep 'Unique.*:'", timeout=180
+                )
                 if not rsmi_output or rsmi_output.strip() == "":
                     raise RuntimeError("Failed to retrieve unique IDs from rocm-smi")
-                
-                rsmi_lines = [line.strip() for line in rsmi_output.split("\n") if line.strip()]
-                
+
+                rsmi_lines = [
+                    line.strip() for line in rsmi_output.split("\n") if line.strip()
+                ]
+
                 # Sort gpu_renderDs based on GPU IDs
                 gpu_renderDs = []
                 for line in rsmi_lines:
                     try:
                         unique_id = line.split()[-1]
                         if unique_id not in uniqueid_renderD_map:
-                            raise KeyError(f"Unique ID '{unique_id}' from rocm-smi not found in KFD mapping")
+                            raise KeyError(
+                                f"Unique ID '{unique_id}' from rocm-smi not found in KFD mapping"
+                            )
                         gpu_renderDs.append(uniqueid_renderD_map[unique_id])
                     except (IndexError, KeyError) as e:
-                        raise RuntimeError(f"Failed to map unique ID from line '{line}': {e}")
+                        raise RuntimeError(
+                            f"Failed to map unique ID from line '{line}': {e}"
+                        )
             else:
                 # Modern method using amd-smi (ROCm >= 6.4.0)
                 # Get list of GPUs from amd-smi (redirect stderr to filter warnings)
                 # Longer timeout (180s) for slow GPU initialization on SLURM compute nodes
-                output = self.console.sh("amd-smi list -e --json 2>/dev/null || amd-smi list -e --json 2>&1", timeout=180)
+                output = self.console.sh(
+                    "amd-smi list -e --json 2>/dev/null || amd-smi list -e --json 2>&1",
+                    timeout=180,
+                )
                 if not output or output.strip() == "":
                     raise ValueError("Failed to retrieve AMD GPU data from amd-smi")
-                
+
                 # amd-smi may output warnings before JSON - extract only JSON part
                 # Look for lines starting with '[' or '{' (JSON start)
                 json_start = -1
-                lines = output.split('\n')
+                lines = output.split("\n")
                 for i, line in enumerate(lines):
-                    if line.strip().startswith('[') or line.strip().startswith('{'):
+                    if line.strip().startswith("[") or line.strip().startswith("{"):
                         json_start = i
                         break
-                
+
                 if json_start >= 0:
-                    json_output = '\n'.join(lines[json_start:])
+                    json_output = "\n".join(lines[json_start:])
                 else:
                     json_output = output
-                
+
                 try:
                     data = json.loads(json_output)
                 except json.JSONDecodeError as e:
-                    raise ValueError(f"Failed to parse amd-smi JSON output: {e}. Output was: {output[:200]}")
-                
+                    raise ValueError(
+                        f"Failed to parse amd-smi JSON output: {e}. Output was: {output[:200]}"
+                    )
+
                 if not data or not isinstance(data, list):
                     raise ValueError("amd-smi returned empty or invalid data")
 
@@ -827,9 +891,13 @@ def get_gpu_renderD_nodes(self) -> typing.Optional[typing.List[int]]:
                             if match:
                                 kfd_nodeids.append(int(match.group()))
                             else:
-                                print(f"Warning: Could not extract node ID from line: {line}")
+                                print(
+                                    f"Warning: Could not extract node ID from line: {line}"
+                                )
                         except (IndexError, ValueError) as e:
-                            print(f"Warning: Failed to parse node ID from line '{line}': {e}")
+                            print(
+                                f"Warning: Failed to parse node ID from line '{line}': {e}"
+                            )
                             continue
 
                     if len(kfd_nodeids) != len(kfd_renderDs):
@@ -840,7 +908,7 @@ def get_gpu_renderD_nodes(self) -> typing.Optional[typing.List[int]]:
 
                     # Map node ids to renderDs
                     nodeid_renderD_map = {
-                        nodeid: renderD 
+                        nodeid: renderD
                         for nodeid, renderD in zip(kfd_nodeids, kfd_renderDs)
                     }
 
@@ -850,12 +918,14 @@ def get_gpu_renderD_nodes(self) -> typing.Optional[typing.List[int]]:
                         try:
                             gpuid_nodeid_map[item["gpu"]] = item["node_id"]
                         except KeyError as e:
-                            raise KeyError(f"Failed to parse node_id from amd-smi data: {e}. Item: {item}")
+                            raise KeyError(
+                                f"Failed to parse node_id from amd-smi data: {e}. Item: {item}"
+                            )
 
                     # Sort gpu_renderDs based on gpu ids
                     try:
                         gpu_renderDs = [
-                            nodeid_renderD_map[gpuid_nodeid_map[gpuid]] 
+                            nodeid_renderD_map[gpuid_nodeid_map[gpuid]]
                             for gpuid in sorted(gpuid_nodeid_map.keys())
                         ]
                     except KeyError as e:
@@ -871,7 +941,9 @@ def get_gpu_renderD_nodes(self) -> typing.Optional[typing.List[int]]:
                             render_num = int(render_str.replace("renderD", ""))
                             gpu_renderDs.append(render_num)
                         except (KeyError, ValueError) as e:
-                            raise RuntimeError(f"Failed to parse renderD from amd-smi: {e}. Item: {item}")
+                            raise RuntimeError(
+                                f"Failed to parse renderD from amd-smi: {e}. Item: {item}"
+                            )
 
         except (RuntimeError, ValueError, KeyError) as e:
             # Re-raise with context
diff --git a/src/madengine/core/dataprovider.py b/src/madengine/core/dataprovider.py
index 809c4425..62914683 100644
--- a/src/madengine/core/dataprovider.py
+++ b/src/madengine/core/dataprovider.py
@@ -26,9 +26,9 @@
 
 # madengine modules
 from madengine.core.console import Console
+from madengine.core.constants import MAD_AWS_S3, MAD_MINIO, NAS_NODES
 from madengine.core.context import Context
 from madengine.core.docker import Docker
-from madengine.core.constants import NAS_NODES, MAD_AWS_S3, MAD_MINIO
 
 
 class DataSourceException(Exception):
@@ -313,11 +313,11 @@ def prepare_data(self, model_docker):
         if "mirrorlocal" in self.config:
             # copy data from NAS locally
             cmd = """
-                if [ -f \"$(which apt)\" ]; then 
+                if [ -f \"$(which apt)\" ]; then
                     apt update && apt install -y sshpass sshfs rsync
-                elif [ -f \"$(which yum)\" ]; then 
+                elif [ -f \"$(which yum)\" ]; then
                     yum install -y sshpass rsync
-                else 
+                else
                     echo 'Unable to detect Host OS'
                     exit 1
                 fi
@@ -350,11 +350,11 @@ def prepare_data(self, model_docker):
             print("Data Download Duration: {} seconds".format(self.duration))
         else:
             cmd = """
-                if [ -f \"$(which apt)\" ]; then 
+                if [ -f \"$(which apt)\" ]; then
                     apt update && apt install -y sshpass sshfs
-                elif [ -f \"$(which yum)\" ]; then 
+                elif [ -f \"$(which yum)\" ]; then
                     yum install -y sshpass sshfs
-                else 
+                else
                     echo 'Unable to detect Host OS'
                     exit 1
                 fi
diff --git a/src/madengine/core/docker.py b/src/madengine/core/docker.py
index 115b9448..c15d5eff 100644
--- a/src/madengine/core/docker.py
+++ b/src/madengine/core/docker.py
@@ -68,17 +68,13 @@ def __init__(
         )
         # if container name exists, clean it up automatically
         if container_name_exists:
-            print(
-                f"⚠️  Container '{container_name}' already exists. Cleaning up..."
-            )
+            print(f"⚠️  Container '{container_name}' already exists. Cleaning up...")
             # Stop the container (with timeout)
             self.console.sh(
                 f"docker stop -t 1 {container_name_quoted} 2>/dev/null || true"
             )
             # Remove the container
-            self.console.sh(
-                f"docker rm -f {container_name_quoted} 2>/dev/null || true"
-            )
+            self.console.sh(f"docker rm -f {container_name_quoted} 2>/dev/null || true")
             print(f"✓ Cleaned up existing container '{container_name}'")
 
         # run docker command
@@ -107,7 +103,7 @@ def __init__(
         command += "--workdir /myworkspace/ "
         command += "--name " + container_name + " "
         command += image + " "
-        
+
         # Use 'cat' to keep container alive (blocks waiting for stdin)
         # Works reliably across all deployment types (local, k8s, slurm)
         # with fresh image pulls preventing corrupted layer issues
@@ -116,9 +112,7 @@ def __init__(
 
         # find container sha — use the same exact-match filter as the existence
         # check above to avoid false positives from substring/regex matches.
-        self.docker_sha = self.console.sh(
-            f"docker ps -aqf name={container_name_regex}"
-        )
+        self.docker_sha = self.console.sh(f"docker ps -aqf name={container_name_regex}")
 
     def sh(self, command: str, timeout: int = 60, secret: bool = False) -> str:
         """Run shell command inside docker.
diff --git a/src/madengine/core/errors.py b/src/madengine/core/errors.py
index 6a0757ab..168a0306 100644
--- a/src/madengine/core/errors.py
+++ b/src/madengine/core/errors.py
@@ -8,15 +8,17 @@
 
 import logging
 from dataclasses import dataclass
-from typing import Optional, Any, Dict, List
 from enum import Enum
+from typing import Any, Dict, List, Optional
 
 try:
     from rich.console import Console
     from rich.panel import Panel
     from rich.text import Text
 except ImportError:
-    raise ImportError("Rich is required for error handling. Install with: pip install rich")
+    raise ImportError(
+        "Rich is required for error handling. Install with: pip install rich"
+    )
 
 
 class ErrorCategory(Enum):
@@ -37,7 +39,7 @@ class ErrorCategory(Enum):
 @dataclass
 class ErrorContext:
     """Context information for errors."""
-    
+
     operation: str
     phase: Optional[str] = None
     component: Optional[str] = None
@@ -49,7 +51,7 @@ class ErrorContext:
 
 class MADEngineError(Exception):
     """Base exception for all madengine errors."""
-    
+
     def __init__(
         self,
         message: str,
@@ -57,7 +59,7 @@ def __init__(
         context: Optional[ErrorContext] = None,
         cause: Optional[Exception] = None,
         recoverable: bool = False,
-        suggestions: Optional[List[str]] = None
+        suggestions: Optional[List[str]] = None,
     ):
         super().__init__(message)
         self.message = message
@@ -73,11 +75,7 @@ class ValidationError(MADEngineError):
 
     def __init__(self, message: str, context: Optional[ErrorContext] = None, **kwargs):
         super().__init__(
-            message,
-            ErrorCategory.VALIDATION,
-            context,
-            recoverable=True,
-            **kwargs
+            message, ErrorCategory.VALIDATION, context, recoverable=True, **kwargs
         )
 
 
@@ -86,11 +84,7 @@ class NetworkError(MADEngineError):
 
     def __init__(self, message: str, context: Optional[ErrorContext] = None, **kwargs):
         super().__init__(
-            message,
-            ErrorCategory.CONNECTION,
-            context,
-            recoverable=True,
-            **kwargs
+            message, ErrorCategory.CONNECTION, context, recoverable=True, **kwargs
         )
 
 
@@ -99,11 +93,7 @@ class AuthenticationError(MADEngineError):
 
     def __init__(self, message: str, context: Optional[ErrorContext] = None, **kwargs):
         super().__init__(
-            message,
-            ErrorCategory.AUTHENTICATION,
-            context,
-            recoverable=True,
-            **kwargs
+            message, ErrorCategory.AUTHENTICATION, context, recoverable=True, **kwargs
         )
 
 
@@ -112,37 +102,25 @@ class ExecutionError(MADEngineError):
 
     def __init__(self, message: str, context: Optional[ErrorContext] = None, **kwargs):
         super().__init__(
-            message,
-            ErrorCategory.RUNTIME,
-            context,
-            recoverable=False,
-            **kwargs
+            message, ErrorCategory.RUNTIME, context, recoverable=False, **kwargs
         )
 
 
 class BuildError(MADEngineError):
     """Build and compilation errors."""
-    
+
     def __init__(self, message: str, context: Optional[ErrorContext] = None, **kwargs):
         super().__init__(
-            message, 
-            ErrorCategory.BUILD, 
-            context, 
-            recoverable=False,
-            **kwargs
+            message, ErrorCategory.BUILD, context, recoverable=False, **kwargs
         )
 
 
 class DiscoveryError(MADEngineError):
     """Model discovery errors."""
-    
+
     def __init__(self, message: str, context: Optional[ErrorContext] = None, **kwargs):
         super().__init__(
-            message, 
-            ErrorCategory.DISCOVERY, 
-            context, 
-            recoverable=True,
-            **kwargs
+            message, ErrorCategory.DISCOVERY, context, recoverable=True, **kwargs
         )
 
 
@@ -151,11 +129,7 @@ class OrchestrationError(MADEngineError):
 
     def __init__(self, message: str, context: Optional[ErrorContext] = None, **kwargs):
         super().__init__(
-            message,
-            ErrorCategory.ORCHESTRATION,
-            context,
-            recoverable=False,
-            **kwargs
+            message, ErrorCategory.ORCHESTRATION, context, recoverable=False, **kwargs
         )
 
 
@@ -164,11 +138,7 @@ class RunnerError(MADEngineError):
 
     def __init__(self, message: str, context: Optional[ErrorContext] = None, **kwargs):
         super().__init__(
-            message,
-            ErrorCategory.RUNNER,
-            context,
-            recoverable=True,
-            **kwargs
+            message, ErrorCategory.RUNNER, context, recoverable=True, **kwargs
         )
 
 
@@ -177,11 +147,7 @@ class ConfigurationError(MADEngineError):
 
     def __init__(self, message: str, context: Optional[ErrorContext] = None, **kwargs):
         super().__init__(
-            message,
-            ErrorCategory.CONFIGURATION,
-            context,
-            recoverable=True,
-            **kwargs
+            message, ErrorCategory.CONFIGURATION, context, recoverable=True, **kwargs
         )
 
 
@@ -190,40 +156,38 @@ class DeploymentTimeoutError(MADEngineError):
 
     def __init__(self, message: str, context: Optional[ErrorContext] = None, **kwargs):
         super().__init__(
-            message,
-            ErrorCategory.TIMEOUT,
-            context,
-            recoverable=True,
-            **kwargs
+            message, ErrorCategory.TIMEOUT, context, recoverable=True, **kwargs
         )
 
 
 class ErrorHandler:
     """Unified error handler with Rich console integration."""
-    
+
     def __init__(self, console: Optional[Console] = None, verbose: bool = False):
         self.console = console or Console()
         self.verbose = verbose
         self.logger = logging.getLogger(__name__)
-    
+
     def handle_error(
-        self, 
-        error: Exception, 
+        self,
+        error: Exception,
         context: Optional[ErrorContext] = None,
-        show_traceback: Optional[bool] = None
+        show_traceback: Optional[bool] = None,
     ) -> None:
         """Handle and display errors with rich formatting."""
-        
+
         show_tb = show_traceback if show_traceback is not None else self.verbose
-        
+
         if isinstance(error, MADEngineError):
             self._handle_madengine_error(error, show_tb)
         else:
             self._handle_generic_error(error, context, show_tb)
-    
-    def _handle_madengine_error(self, error: MADEngineError, show_traceback: bool) -> None:
+
+    def _handle_madengine_error(
+        self, error: MADEngineError, show_traceback: bool
+    ) -> None:
         """Handle madengine structured errors."""
-        
+
         # Determine error emoji and color
         category_info = {
             ErrorCategory.VALIDATION: ("⚠️", "yellow"),
@@ -237,16 +201,16 @@ def _handle_madengine_error(self, error: MADEngineError, show_traceback: bool) -
             ErrorCategory.CONFIGURATION: ("⚙️", "yellow"),
             ErrorCategory.TIMEOUT: ("⏱️", "yellow"),
         }
-        
+
         emoji, color = category_info.get(error.category, ("❌", "red"))
-        
+
         # Create error panel
         title = f"{emoji} {error.category.value.title()} Error"
-        
+
         # Build error content
         content = Text()
         content.append(f"{error.message}\n", style=f"bold {color}")
-        
+
         # Add context information
         if error.context:
             content.append("\n📋 Context:\n", style="bold cyan")
@@ -262,58 +226,50 @@ def _handle_madengine_error(self, error: MADEngineError, show_traceback: bool) -
                 content.append(f"  Node: {error.context.node_id}\n")
             if error.context.file_path:
                 content.append(f"  File: {error.context.file_path}\n")
-        
+
         # Add cause information
         if error.cause:
             content.append(f"\n🔗 Caused by: {str(error.cause)}\n", style="dim")
-        
+
         # Add suggestions
         if error.suggestions:
             content.append("\n💡 Suggestions:\n", style="bold green")
             for suggestion in error.suggestions:
                 content.append(f"  • {suggestion}\n", style="green")
-        
+
         # Add recovery information
         if error.recoverable:
             content.append("\n♻️  This error may be recoverable", style="bold blue")
-        
-        panel = Panel(
-            content,
-            title=title,
-            border_style=color,
-            expand=False
-        )
-        
+
+        panel = Panel(content, title=title, border_style=color, expand=False)
+
         self.console.print(panel)
-        
+
         # Show traceback if requested
         if show_traceback and error.cause:
             self.console.print("\n📚 [bold]Full Traceback:[/bold]")
             self.console.print_exception()
-        
+
         # Log to file
         self.logger.error(
             f"{error.category.value}: {error.message}",
             extra={
                 "context": error.context.__dict__ if error.context else {},
                 "recoverable": error.recoverable,
-                "suggestions": error.suggestions
-            }
+                "suggestions": error.suggestions,
+            },
         )
-    
+
     def _handle_generic_error(
-        self, 
-        error: Exception, 
-        context: Optional[ErrorContext], 
-        show_traceback: bool
+        self, error: Exception, context: Optional[ErrorContext], show_traceback: bool
     ) -> None:
         """Handle generic Python exceptions."""
-        
+
         title = f"❌ {type(error).__name__}"
-        
+
         content = Text()
         content.append(f"{str(error)}\n", style="bold red")
-        
+
         if context:
             content.append("\n📋 Context:\n", style="bold cyan")
             content.append(f"  Operation: {context.operation}\n")
@@ -321,20 +277,15 @@ def _handle_generic_error(
                 content.append(f"  Phase: {context.phase}\n")
             if context.component:
                 content.append(f"  Component: {context.component}\n")
-        
-        panel = Panel(
-            content,
-            title=title,
-            border_style="red",
-            expand=False
-        )
-        
+
+        panel = Panel(content, title=title, border_style="red", expand=False)
+
         self.console.print(panel)
-        
+
         if show_traceback:
             self.console.print("\n📚 [bold]Full Traceback:[/bold]")
             self.console.print_exception()
-        
+
         # Log to file
         self.logger.error(f"{type(error).__name__}: {str(error)}")
 
@@ -355,9 +306,9 @@ def get_error_handler() -> Optional[ErrorHandler]:
 
 
 def handle_error(
-    error: Exception, 
+    error: Exception,
     context: Optional[ErrorContext] = None,
-    show_traceback: Optional[bool] = None
+    show_traceback: Optional[bool] = None,
 ) -> None:
     """Handle error using the global error handler."""
     if _global_error_handler:
@@ -373,12 +324,7 @@ def create_error_context(
     operation: str,
     phase: Optional[str] = None,
     component: Optional[str] = None,
-    **kwargs
+    **kwargs,
 ) -> ErrorContext:
     """Convenience function to create error context."""
-    return ErrorContext(
-        operation=operation,
-        phase=phase,
-        component=component,
-        **kwargs
-    )
+    return ErrorContext(operation=operation, phase=phase, component=component, **kwargs)
diff --git a/src/madengine/database/__init__.py b/src/madengine/database/__init__.py
index 89c630c0..141bb03b 100644
--- a/src/madengine/database/__init__.py
+++ b/src/madengine/database/__init__.py
@@ -6,12 +6,12 @@
 """
 
 from .mongodb import (
-    MongoDBHandler,
-    upload_csv_to_mongodb,
-    upload_file_to_mongodb,
     MongoDBConfig,
+    MongoDBHandler,
     UploadOptions,
     UploadResult,
+    upload_csv_to_mongodb,
+    upload_file_to_mongodb,
 )
 
 __all__ = [
@@ -22,4 +22,3 @@
     "UploadOptions",
     "UploadResult",
 ]
-
diff --git a/src/madengine/database/mongodb.py b/src/madengine/database/mongodb.py
index 7713e991..e0d4bb48 100644
--- a/src/madengine/database/mongodb.py
+++ b/src/madengine/database/mongodb.py
@@ -21,7 +21,13 @@
 from pymongo import UpdateOne
 from pymongo.errors import BulkWriteError, ConnectionFailure, PyMongoError
 from rich.console import Console
-from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
+from rich.progress import (
+    Progress,
+    SpinnerColumn,
+    TextColumn,
+    BarColumn,
+    TaskProgressColumn,
+)
 
 logger = logging.getLogger(__name__)
 console = Console()
@@ -31,62 +37,66 @@
 # Configuration
 # ============================================================================
 
+
 @dataclass
 class MongoDBConfig:
     """MongoDB connection configuration."""
-    
+
     host: str = "localhost"
     port: int = 27017
     username: str = ""
     password: str = ""
     auth_source: str = "admin"
     timeout_ms: int = 5000
-    
+
     @classmethod
-    def from_env(cls) -> 'MongoDBConfig':
+    def from_env(cls) -> "MongoDBConfig":
         """Load configuration from environment variables."""
         import os
+
         return cls(
             host=os.getenv("MONGO_HOST", "localhost"),
             port=int(os.getenv("MONGO_PORT", "27017")),
             username=os.getenv("MONGO_USER", ""),
             password=os.getenv("MONGO_PASSWORD", ""),
             auth_source=os.getenv("MONGO_AUTH_SOURCE", "admin"),
-            timeout_ms=int(os.getenv("MONGO_TIMEOUT_MS", "5000"))
+            timeout_ms=int(os.getenv("MONGO_TIMEOUT_MS", "5000")),
         )
-    
+
     @property
     def uri(self) -> str:
         """Build MongoDB connection URI."""
         if self.username and self.password:
-            return (f"mongodb://{self.username}:{self.password}@"
-                   f"{self.host}:{self.port}/{self.auth_source}")
+            return (
+                f"mongodb://{self.username}:{self.password}@"
+                f"{self.host}:{self.port}/{self.auth_source}"
+            )
         return f"mongodb://{self.host}:{self.port}"
 
 
 @dataclass
 class UploadOptions:
     """Options for document upload."""
-    
+
     # Deduplication strategy
     unique_fields: Optional[List[str]] = None  # Fields to use for uniqueness
     upsert: bool = True  # Update existing or insert only
-    
+
     # Performance options
     batch_size: int = 1000  # Documents per batch
     ordered: bool = False  # Continue on error
-    
+
     # Index creation
     create_indexes: bool = True
     index_fields: Optional[List[str]] = None  # Auto-detect if None
-    
+
     # Metadata
     add_metadata: bool = True
     metadata_prefix: str = "_meta"
-    
+
     # Validation
     validate_schema: bool = True
-    
+
     # Dry run
     dry_run: bool = False
 
@@ -94,7 +104,7 @@ class UploadOptions:
 @dataclass
 class UploadResult:
     """Result of upload operation."""
-    
+
     status: str  # success, partial, failed
     documents_read: int
     documents_processed: int
@@ -103,7 +113,7 @@ class UploadResult:
     documents_failed: int
     errors: List[str] = field(default_factory=list)
     duration_seconds: float = 0.0
-    
+
     def print_summary(self):
         """Print formatted summary."""
         if self.status == "success":
@@ -112,7 +122,7 @@ def print_summary(self):
             console.print(f"⚠️  [bold yellow]Partial success[/bold yellow]")
         else:
             console.print(f"❌ [bold red]Upload failed[/bold red]")
-        
+
         console.print(f"   📊 Documents read: {self.documents_read}")
         console.print(f"   ✨ Documents processed: {self.documents_processed}")
         console.print(f"   ➕ Inserted: {self.documents_inserted}")
@@ -126,20 +136,22 @@ def print_summary(self):
 # File Loaders (Strategy Pattern)
 # ============================================================================
 
+
 class FileFormat(Enum):
     """Supported file formats."""
+
     CSV = "csv"
     JSON = "json"
 
 
 class DocumentLoader(ABC):
     """Abstract base class for document loaders."""
-    
+
     @abstractmethod
     def load(self, file_path: Path) -> List[Dict[str, Any]]:
         """Load documents from file."""
         pass
-    
+
     @abstractmethod
     def infer_schema(self, documents: List[Dict[str, Any]]) -> Dict[str, type]:
         """Infer schema from documents."""
@@ -148,14 +160,14 @@ def infer_schema(self, documents: List[Dict[str, Any]]) -> Dict[str, type]:
 
 class JSONLoader(DocumentLoader):
     """Loader for JSON files with native type preservation."""
-    
+
     def load(self, file_path: Path) -> List[Dict[str, Any]]:
         """Load JSON file preserving native types."""
         logger.info(f"Loading JSON file: {file_path}")
-        
-        with open(file_path, 'r') as f:
+
+        with open(file_path, "r") as f:
             data = json.load(f)
-        
+
         # Normalize to list
         if isinstance(data, dict):
             documents = [data]
@@ -163,42 +175,42 @@ def load(self, file_path: Path) -> List[Dict[str, Any]]:
             documents = data
         else:
             raise ValueError(f"Expected JSON object or array, got {type(data)}")
-        
+
         # Validate structure
         for i, doc in enumerate(documents):
             if not isinstance(doc, dict):
                 raise ValueError(f"Document {i} is not a JSON object: {type(doc)}")
-        
+
         logger.info(f"Loaded {len(documents)} documents from JSON")
         return documents
-    
+
     def infer_schema(self, documents: List[Dict[str, Any]]) -> Dict[str, type]:
         """Infer schema from JSON documents."""
         if not documents:
             return {}
-        
+
         schema = {}
         sample_doc = documents[0]
-        
+
         for key, value in sample_doc.items():
             schema[key] = type(value)
-        
+
         return schema
 
 
 class CSVLoader(DocumentLoader):
     """Loader for CSV files with intelligent type inference."""
-    
+
     def load(self, file_path: Path) -> List[Dict[str, Any]]:
         """Load CSV file with type inference."""
         logger.info(f"Loading CSV file: {file_path}")
-        
+
         # Read CSV with pandas (intelligent type inference)
         df = pd.read_csv(file_path)
-        
+
         # Clean column names
         df.columns = df.columns.str.strip()
-        
+
         # Convert to documents with native types preserved
         documents = []
         for _, row in df.iterrows():
@@ -209,7 +221,7 @@ def load(self, file_path: Path) -> List[Dict[str, Any]]:
                 if pd.isna(value):
                     doc[col] = None
                 # Try to parse JSON strings (for configs, multi_results)
-                elif isinstance(value, str) and value.strip().startswith(('{', '[')):
+                elif isinstance(value, str) and value.strip().startswith(("{", "[")):
                     try:
                         doc[col] = json.loads(value)
                     except json.JSONDecodeError:
@@ -217,44 +229,44 @@ def load(self, file_path: Path) -> List[Dict[str, Any]]:
                 else:
                     # Keep native type (int, float, bool, str)
                     doc[col] = value if not pd.isna(value) else None
-            
+
             documents.append(doc)
-        
+
         logger.info(f"Loaded {len(documents)} documents from CSV")
         return documents
-    
+
     def infer_schema(self, documents: List[Dict[str, Any]]) -> Dict[str, type]:
         """Infer schema from CSV documents."""
         if not documents:
             return {}
-        
+
         schema = {}
         sample_doc = documents[0]
-        
+
         for key, value in sample_doc.items():
             if value is None:
                 schema[key] = type(None)
             else:
                 schema[key] = type(value)
-        
+
         return schema
 
 
 def detect_file_format(file_path: Path) -> FileFormat:
     """Detect file format from extension and content."""
-    
+
     extension = file_path.suffix.lower()
-    
-    if extension == '.json':
+
+    if extension == ".json":
         return FileFormat.JSON
-    elif extension == '.csv':
+    elif extension == ".csv":
         return FileFormat.CSV
-    
+
     # Content-based detection
     try:
-        with open(file_path, 'r') as f:
+        with open(file_path, "r") as f:
             first_char = f.read(1).strip()
-            if first_char in ['{', '[']:
+            if first_char in ["{", "["]:
                 return FileFormat.JSON
             else:
                 return FileFormat.CSV
@@ -275,88 +287,93 @@ def get_loader(file_format: FileFormat) -> DocumentLoader:
 # Document Transformer
 # ============================================================================
 
+
 class DocumentTransformer:
     """Transform and enrich documents before upload."""
-    
+
     def __init__(self, options: UploadOptions):
         self.options = options
-    
+
     def transform(self, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Transform documents with metadata and normalization."""
         transformed = []
-        
+
         for doc in documents:
             # Add metadata
             if self.options.add_metadata:
                 doc = self._add_metadata(doc)
-            
+
             # Normalize types
             doc = self._normalize_types(doc)
-            
+
             transformed.append(doc)
-        
+
         return transformed
-    
+
     def _add_metadata(self, doc: Dict[str, Any]) -> Dict[str, Any]:
         """Add metadata fields."""
         prefix = self.options.metadata_prefix
-        
+
         # Add upload timestamp if not present
         if f"{prefix}_uploaded_at" not in doc:
             doc[f"{prefix}_uploaded_at"] = datetime.now(timezone.utc)
-        
+
         # Preserve original created_date if present
         if "created_date" not in doc:
-            doc["created_date"] = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
-        
+            doc["created_date"] = datetime.now(timezone.utc).strftime(
+                "%Y-%m-%d %H:%M:%S"
+            )
+
         return doc
-    
+
     def _normalize_types(self, doc: Dict[str, Any]) -> Dict[str, Any]:
         """Normalize types for MongoDB compatibility."""
         normalized = {}
-        
+
         for key, value in doc.items():
             # Handle numpy types (from pandas)
-            if hasattr(value, 'item'):  # numpy scalar
+            if hasattr(value, "item"):  # numpy scalar
                 value = value.item()
-            
+
             # Convert pandas Timestamp to datetime
-            if hasattr(value, 'to_pydatetime'):
+            if hasattr(value, "to_pydatetime"):
                 value = value.to_pydatetime()
-            
+
             # Keep None as None (not empty string)
             if pd.isna(value):
                 value = None
-            
+
             normalized[key] = value
-        
+
         return normalized
-    
+
     def infer_unique_fields(self, documents: List[Dict[str, Any]]) -> List[str]:
         """Intelligently infer unique identifier fields."""
         if not documents:
             return []
-        
+
         # Common unique field patterns
-        candidate_fields = ['model', 'name', 'id', 'timestamp', 'date', 'pipeline']
-        
+        candidate_fields = ["model", "name", "id", "timestamp", "date", "pipeline"]
+
         available_fields = set(documents[0].keys())
         unique_fields = []
-        
+
         for field in candidate_fields:
             if field in available_fields:
                 # Check if field has unique values
                 values = [doc.get(field) for doc in documents[:100]]  # Sample
-                if len(set(str(v) for v in values if v is not None)) == len([v for v in values if v is not None]):
+                if len(set(str(v) for v in values if v is not None)) == len(
+                    [v for v in values if v is not None]
+                ):
                     unique_fields.append(field)
                     break  # Found a unique field
-        
+
         # If no single unique field, try combinations
-        if not unique_fields and 'model' in available_fields:
-            unique_fields = ['model']
-            if 'timestamp' in available_fields:
-                unique_fields.append('timestamp')
-        
+        if not unique_fields and "model" in available_fields:
+            unique_fields = ["model"]
+            if "timestamp" in available_fields:
+                unique_fields.append("timestamp")
+
         return unique_fields
 
 
@@ -364,90 +381,87 @@ def infer_unique_fields(self, documents: List[Dict[str, Any]]) -> List[str]:
 # MongoDB Uploader
 # ============================================================================
 
+
 class MongoDBUploader:
     """Handles MongoDB connection and bulk upload operations."""
-    
+
     def __init__(self, config: MongoDBConfig):
         self.config = config
         self.client: Optional[pymongo.MongoClient] = None
-    
+
     def __enter__(self):
         """Context manager entry."""
         self.connect()
         return self
-    
+
     def __exit__(self, exc_type, exc_val, exc_tb):
         """Context manager exit."""
         self.disconnect()
-    
+
     def connect(self):
         """Establish MongoDB connection."""
         logger.info(f"Connecting to MongoDB at {self.config.host}:{self.config.port}")
-        
+
         self.client = pymongo.MongoClient(
-            self.config.uri,
-            serverSelectionTimeoutMS=self.config.timeout_ms
+            self.config.uri, serverSelectionTimeoutMS=self.config.timeout_ms
         )
-        
+
         # Test connection
         self.client.server_info()
         logger.info("✅ Connected to MongoDB")
-    
+
     def disconnect(self):
         """Close MongoDB connection."""
         if self.client:
             self.client.close()
             logger.info("Disconnected from MongoDB")
-    
+
     def upload(
         self,
         documents: List[Dict[str, Any]],
         database_name: str,
         collection_name: str,
-        options: UploadOptions
+        options: UploadOptions,
     ) -> UploadResult:
         """Upload documents to MongoDB with bulk operations."""
-        
+
         start_time = datetime.now()
-        
+
         # Get collection
         db = self.client[database_name]
         collection = db[collection_name]
-        
+
         # Create indexes if requested
         if options.create_indexes:
             self._create_indexes(collection, documents, options)
-        
+
         # Perform bulk upload
         result = self._bulk_upload(collection, documents, options)
-        
+
         # Calculate duration
         result.duration_seconds = (datetime.now() - start_time).total_seconds()
-        
+
         return result
-    
+
     def _create_indexes(
-        self,
-        collection,
-        documents: List[Dict[str, Any]],
-        options: UploadOptions
+        self, collection, documents: List[Dict[str, Any]], options: UploadOptions
     ):
         """Create indexes for efficient querying."""
         if not documents:
             return
-        
+
         # Determine fields to index
         index_fields = options.index_fields or []
-        
+
         if not index_fields and options.unique_fields:
             index_fields = options.unique_fields
-        
+
         # Auto-detect common index candidates
         if not index_fields:
-            common_index_fields = ['model', 'timestamp', 'date', 'status', 'pipeline']
+            common_index_fields = ["model", "timestamp", "date", "status", "pipeline"]
             available = set(documents[0].keys())
             index_fields = [f for f in common_index_fields if f in available]
-        
+
         # Create indexes
         for field in index_fields:
             try:
@@ -455,7 +469,7 @@ def _create_indexes(
                 logger.info(f"Created index on field: {field}")
             except PyMongoError as e:
                 logger.warning(f"Could not create index on {field}: {e}")
-        
+
         # Create compound index for unique fields
         if options.unique_fields and len(options.unique_fields) > 1:
             try:
@@ -464,20 +478,17 @@ def _create_indexes(
                 logger.info(f"Created compound index on: {options.unique_fields}")
             except PyMongoError as e:
                 logger.warning(f"Could not create compound index: {e}")
-    
+
     def _bulk_upload(
-        self,
-        collection,
-        documents: List[Dict[str, Any]],
-        options: UploadOptions
+        self, collection, documents: List[Dict[str, Any]], options: UploadOptions
     ) -> UploadResult:
         """Perform bulk upload with batching."""
-        
+
         total_inserted = 0
         total_updated = 0
         total_failed = 0
         errors = []
-        
+
         # Prepare bulk operations
         if options.upsert and options.unique_fields:
             operations = self._build_upsert_operations(documents, options.unique_fields)
@@ -487,10 +498,10 @@ def _bulk_upload(
                 result = collection.insert_many(documents, ordered=options.ordered)
                 total_inserted = len(result.inserted_ids)
             except BulkWriteError as e:
-                total_inserted = e.details.get('nInserted', 0)
-                total_failed = len(e.details.get('writeErrors', []))
-                errors = [err['errmsg'] for err in e.details.get('writeErrors', [])]
-            
+                total_inserted = e.details.get("nInserted", 0)
+                total_failed = len(e.details.get("writeErrors", []))
+                errors = [err["errmsg"] for err in e.details.get("writeErrors", [])]
+
             return UploadResult(
                 status="success" if total_failed == 0 else "partial",
                 documents_read=len(documents),
@@ -498,44 +509,49 @@ def _bulk_upload(
                 documents_inserted=total_inserted,
                 documents_updated=0,
                 documents_failed=total_failed,
-                errors=errors
+                errors=errors,
             )
-        
+
         # Batched bulk write for upsert operations
         batch_size = options.batch_size
-        
+
         with Progress(
             SpinnerColumn(),
             TextColumn("[progress.description]{task.description}"),
             BarColumn(),
             TaskProgressColumn(),
-            console=console
+            console=console,
         ) as progress:
-            
+
             task = progress.add_task(
-                f"Uploading to {collection.name}...",
-                total=len(operations)
+                f"Uploading to {collection.name}...", total=len(operations)
             )
-            
+
             for i in range(0, len(operations), batch_size):
-                batch = operations[i:i + batch_size]
-                
+                batch = operations[i : i + batch_size]
+
                 try:
                     result = collection.bulk_write(batch, ordered=options.ordered)
                     total_inserted += result.upserted_count
                     total_updated += result.modified_count
-                    
+
                 except BulkWriteError as e:
-                    total_inserted += e.details.get('nUpserted', 0)
-                    total_updated += e.details.get('nModified', 0)
-                    write_errors = e.details.get('writeErrors', [])
+                    total_inserted += e.details.get("nUpserted", 0)
+                    total_updated += e.details.get("nModified", 0)
+                    write_errors = e.details.get("writeErrors", [])
                     total_failed += len(write_errors)
-                    errors.extend([err['errmsg'] for err in write_errors[:5]])  # Limit error messages
-                
+                    errors.extend(
+                        [err["errmsg"] for err in write_errors[:5]]
+                    )  # Limit error messages
+
                 progress.update(task, advance=len(batch))
-        
-        status = "success" if total_failed == 0 else ("partial" if total_inserted + total_updated > 0 else "failed")
-        
+
+        status = (
+            "success"
+            if total_failed == 0
+            else ("partial" if total_inserted + total_updated > 0 else "failed")
+        )
+
         return UploadResult(
             status=status,
             documents_read=len(documents),
@@ -543,34 +559,26 @@ def _bulk_upload(
             documents_inserted=total_inserted,
             documents_updated=total_updated,
             documents_failed=total_failed,
-            errors=errors
+            errors=errors,
         )
-    
+
     def _build_upsert_operations(
-        self,
-        documents: List[Dict[str, Any]],
-        unique_fields: List[str]
+        self, documents: List[Dict[str, Any]], unique_fields: List[str]
     ) -> List[UpdateOne]:
         """Build bulk upsert operations."""
         operations = []
-        
+
         for doc in documents:
             # Build filter from unique fields
             filter_doc = {field: doc[field] for field in unique_fields if field in doc}
-            
+
             if not filter_doc:
                 # No unique fields, skip or insert
                 continue
-            
+
             # Upsert operation
-            operations.append(
-                UpdateOne(
-                    filter_doc,
-                    {"$set": doc},
-                    upsert=True
-                )
-            )
-        
+            operations.append(UpdateOne(filter_doc, {"$set": doc}, upsert=True))
+
         return operations
 
 
@@ -578,16 +586,17 @@ def _build_upsert_operations(
 # Main Upload Function
 # ============================================================================
 
+
 def upload_file_to_mongodb(
     file_path: str,
     database_name: str,
     collection_name: str,
     config: Optional[MongoDBConfig] = None,
-    options: Optional[UploadOptions] = None
+    options: Optional[UploadOptions] = None,
 ) -> UploadResult:
     """
     Upload CSV or JSON file to MongoDB with intelligent handling.
-    
+
     This is the main entry point for file uploads.
 
     Args:
@@ -599,7 +608,7 @@ def upload_file_to_mongodb(
 
     Returns:
         UploadResult with operation details
-        
+
     Raises:
         FileNotFoundError: If file doesn't exist
         ValueError: If file format is invalid
@@ -609,43 +618,49 @@ def upload_file_to_mongodb(
     file_path = Path(file_path)
     if not file_path.exists():
         raise FileNotFoundError(f"File not found: {file_path}")
-    
+
     config = config or MongoDBConfig.from_env()
     options = options or UploadOptions()
-    
+
     # Detect format and load documents
     file_format = detect_file_format(file_path)
     loader = get_loader(file_format)
-    
-    console.print(f"📂 Loading {file_format.value.upper()} file: [cyan]{file_path.name}[/cyan]")
+
+    console.print(
+        f"📂 Loading {file_format.value.upper()} file: [cyan]{file_path.name}[/cyan]"
+    )
     documents = loader.load(file_path)
-    
+
     if not documents:
         raise ValueError(f"No documents found in {file_path}")
-    
+
     console.print(f"✅ Loaded {len(documents)} documents")
-    
+
     # Transform documents
     transformer = DocumentTransformer(options)
-    
+
     # Infer unique fields if not specified
     if options.unique_fields is None:
         options.unique_fields = transformer.infer_unique_fields(documents)
         if options.unique_fields:
-            console.print(f"🔑 Auto-detected unique fields: [yellow]{', '.join(options.unique_fields)}[/yellow]")
-    
+            console.print(
+                f"🔑 Auto-detected unique fields: [yellow]{', '.join(options.unique_fields)}[/yellow]"
+            )
+
     documents = transformer.transform(documents)
-    
+
     # Handle dry-run before connecting to MongoDB
     if options.dry_run:
-        console.print(f"\n🔍 [yellow]DRY RUN: Would upload {len(documents)} documents[/yellow]")
+        console.print(
+            f"\n🔍 [yellow]DRY RUN: Would upload {len(documents)} documents[/yellow]"
+        )
         console.print(f"   Database: {database_name}")
         console.print(f"   Collection: {collection_name}")
         if options.unique_fields:
             console.print(f"   Unique fields: {', '.join(options.unique_fields)}")
         console.print(f"   Upsert: {options.upsert}")
         console.print(f"   Create indexes: {options.create_indexes}")
-        
+
         return UploadResult(
             status="success",
             documents_read=len(documents),
@@ -653,18 +668,18 @@ def upload_file_to_mongodb(
             documents_inserted=0,
             documents_updated=0,
             documents_failed=0,
-            duration_seconds=0.0
+            duration_seconds=0.0,
         )
-    
+
     # Upload to MongoDB
     with MongoDBUploader(config) as uploader:
         result = uploader.upload(
             documents=documents,
             database_name=database_name,
             collection_name=collection_name,
-            options=options
+            options=options,
         )
-    
+
     return result
 
 
@@ -672,42 +687,45 @@ def upload_file_to_mongodb(
 # Legacy Compatibility
 # ============================================================================
 
+
 def upload_csv_to_mongodb(
     csv_file_path: str,
     database_name: str,
     collection_name: str,
-    mongo_config: Optional[MongoDBConfig] = None
+    mongo_config: Optional[MongoDBConfig] = None,
 ) -> Dict[str, Any]:
     """
     Upload CSV data to MongoDB collection.
-    
+
     DEPRECATED: Use upload_file_to_mongodb() instead.
     This function is kept for backward compatibility.
-    
+
     Args:
         csv_file_path: Path to CSV file
         database_name: Name of MongoDB database
         collection_name: Name of MongoDB collection
         mongo_config: MongoDB configuration (uses environment if None)
-        
+
     Returns:
         Dictionary with operation results
     """
-    logger.warning("upload_csv_to_mongodb is deprecated. Use upload_file_to_mongodb instead.")
-    
+    logger.warning(
+        "upload_csv_to_mongodb is deprecated. Use upload_file_to_mongodb instead."
+    )
+
     result = upload_file_to_mongodb(
         file_path=csv_file_path,
         database_name=database_name,
         collection_name=collection_name,
         config=mongo_config,
-        options=UploadOptions()
+        options=UploadOptions(),
     )
-    
+
     # Convert UploadResult to legacy dict format
     return {
         "status": "success" if result.status == "success" else "partial",
-            "database": database_name,
-            "collection": collection_name,
+        "database": database_name,
+        "collection": collection_name,
         "records_processed": result.documents_processed,
     }
 
@@ -715,7 +733,7 @@ def upload_csv_to_mongodb(
 class MongoDBHandler:
     """
     Legacy handler class for MongoDB operations.
-    
+
     DEPRECATED: This class is kept for backward compatibility.
     Use upload_file_to_mongodb() directly instead.
     """
@@ -728,15 +746,19 @@ def __init__(self, args):
         self.config = MongoDBConfig.from_env()
         self.database_name = args.database_name
         self.collection_name = args.collection_name
-        
+
         # Support both old and new parameter names
-        self.file_path = getattr(args, 'file_path', None) or getattr(args, 'csv_file_path', None)
-        self.unique_key = getattr(args, 'unique_key', None)
+        self.file_path = getattr(args, "file_path", None) or getattr(
+            args, "csv_file_path", None
+        )
+        self.unique_key = getattr(args, "unique_key", None)
         self.return_status = False
 
     def run(self) -> bool:
         """Execute the MongoDB upload operation."""
-        logger.warning("MongoDBHandler is deprecated. Use upload_file_to_mongodb instead.")
+        logger.warning(
+            "MongoDBHandler is deprecated. Use upload_file_to_mongodb instead."
+        )
 
         console.print("\n" + "=" * 80)
         console.print("[bold blue]📤 UPLOADING TO MONGODB[/bold blue]")
@@ -744,24 +766,26 @@ def run(self) -> bool:
         console.print(f"📂 File: [cyan]{self.file_path}[/cyan]")
         console.print(f"🗄️  Database: [cyan]{self.database_name}[/cyan]")
         console.print(f"📊 Collection: [cyan]{self.collection_name}[/cyan]")
-        
+
         try:
             # Parse unique fields if provided
             unique_fields = None
             if self.unique_key:
-                unique_fields = [k.strip() for k in self.unique_key.split(',')]
-            
+                unique_fields = [k.strip() for k in self.unique_key.split(",")]
+
             options = UploadOptions(unique_fields=unique_fields)
-            
+
             result = upload_file_to_mongodb(
                 file_path=self.file_path,
                 database_name=self.database_name,
                 collection_name=self.collection_name,
                 config=self.config,
-                options=options
+                options=options,
             )
 
-            console.print(f"✅ [bold green]Successfully processed {result.documents_processed} documents[/bold green]")
+            console.print(
+                f"✅ [bold green]Successfully processed {result.documents_processed} documents[/bold green]"
+            )
             console.print(f"   Inserted: {result.documents_inserted}")
             console.print(f"   Updated: {result.documents_updated}")
             console.print("=" * 80 + "\n")
@@ -773,7 +797,9 @@ def run(self) -> bool:
             self.return_status = False
         except ConnectionFailure as e:
             console.print(f"[bold red]❌ MongoDB connection failed:[/bold red] {e}")
-            console.print("[yellow]💡 Tip: Check MONGO_HOST, MONGO_PORT, MONGO_USER, MONGO_PASSWORD[/yellow]")
+            console.print(
+                "[yellow]💡 Tip: Check MONGO_HOST, MONGO_PORT, MONGO_USER, MONGO_PASSWORD[/yellow]"
+            )
             self.return_status = False
         except ValueError as e:
             console.print(f"[bold red]❌ Invalid file:[/bold red] {e}")
diff --git a/src/madengine/deployment/__init__.py b/src/madengine/deployment/__init__.py
index c48e99b8..02618530 100644
--- a/src/madengine/deployment/__init__.py
+++ b/src/madengine/deployment/__init__.py
@@ -13,12 +13,7 @@
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
-from .base import (
-    BaseDeployment,
-    DeploymentConfig,
-    DeploymentResult,
-    DeploymentStatus,
-)
+from .base import BaseDeployment, DeploymentConfig, DeploymentResult, DeploymentStatus
 from .factory import DeploymentFactory
 
 __all__ = [
@@ -28,4 +23,3 @@
     "DeploymentStatus",
     "DeploymentFactory",
 ]
-
diff --git a/src/madengine/deployment/base.py b/src/madengine/deployment/base.py
index a032c037..a94cbd4e 100644
--- a/src/madengine/deployment/base.py
+++ b/src/madengine/deployment/base.py
@@ -19,7 +19,6 @@
 from jinja2 import Environment, FileSystemLoader
 from rich.console import Console
 
-
 # Regex for parsing "performance: <value> <metric>" log lines.
 # Value: optional sign, integer/decimal, scientific notation (e or E).
 # Separator: optional unit suffix (/[a-zA-Z]+) and/or comma, in any order —
@@ -205,7 +204,9 @@ def execute(self) -> DeploymentResult:
                     metrics = self.collect_results(result.deployment_id)
                     result.metrics = metrics
                 except Exception as e:
-                    self.console.print(f"[yellow]Warning: Could not collect results for {result.deployment_id}: {e}[/yellow]")
+                    self.console.print(
+                        f"[yellow]Warning: Could not collect results for {result.deployment_id}: {e}[/yellow]"
+                    )
                     # Ensure empty metrics dict exists even if collection fails
                     result.metrics = {"successful_runs": [], "failed_runs": []}
 
@@ -214,7 +215,9 @@ def execute(self) -> DeploymentResult:
         except KeyboardInterrupt:
             if result is not None and getattr(result, "deployment_id", None):
                 self.cleanup(result.deployment_id)
-                self.console.print("\n[yellow]Cancelled deployment and cleaned up resources.[/yellow]")
+                self.console.print(
+                    "\n[yellow]Cancelled deployment and cleaned up resources.[/yellow]"
+                )
             raise
         except Exception as e:
             self.console.print(f"[red]Deployment error: {e}[/red]")
@@ -239,13 +242,15 @@ def _monitor_until_complete(self, deployment_id: str) -> DeploymentResult:
         while True:
             status = self.monitor(deployment_id)
 
-            if status.status in [DeploymentStatus.SUCCESS, DeploymentStatus.FAILED, DeploymentStatus.UNKNOWN]:
+            if status.status in [
+                DeploymentStatus.SUCCESS,
+                DeploymentStatus.FAILED,
+                DeploymentStatus.UNKNOWN,
+            ]:
                 return status
 
             # Still running, wait and check again
-            self.console.print(
-                f"  Status: {status.status.value} - {status.message}"
-            )
+            self.console.print(f"  Status: {status.status.value} - {status.message}")
             time.sleep(30)  # Check every 30 seconds
 
     # Abstract methods to be implemented by subclasses
@@ -388,7 +393,7 @@ def _parse_performance_from_log(
             return None
 
         value = float(match.group(1))
-        metric = match.group(2).rstrip(',')
+        metric = match.group(2).rstrip(",")
 
         node_id_pattern = r"node_id:\s*(\d+)"
         node_match = re.search(node_id_pattern, log_content)
@@ -503,7 +508,9 @@ def _aggregate_node_metrics(
             aggregated_value = sum(m["performance"] for m in per_node_metrics)
             method_desc = "sum_across_nodes"
         elif aggregation_method == "average":
-            aggregated_value = statistics.mean(m["performance"] for m in per_node_metrics)
+            aggregated_value = statistics.mean(
+                m["performance"] for m in per_node_metrics
+            )
             method_desc = "average_across_nodes"
         elif aggregation_method == "max":
             aggregated_value = max(m["performance"] for m in per_node_metrics)
@@ -543,7 +550,8 @@ def _aggregate_node_metrics(
         durations = [
             m.get("duration", m.get("test_duration", "N/A"))
             for m in per_node_metrics
-            if m.get("duration", "N/A") != "N/A" or m.get("test_duration", "N/A") != "N/A"
+            if m.get("duration", "N/A") != "N/A"
+            or m.get("test_duration", "N/A") != "N/A"
         ]
         if durations:
             duration_values = []
@@ -558,7 +566,9 @@ def _aggregate_node_metrics(
             duration = "N/A"
 
         total_gpus = sum(m.get("local_gpus", 1) for m in per_node_metrics)
-        gpus_per_node = per_node_metrics[0].get("local_gpus", 1) if per_node_metrics else 1
+        gpus_per_node = (
+            per_node_metrics[0].get("local_gpus", 1) if per_node_metrics else 1
+        )
 
         aggregated_record = {
             "model": first_metric["model"],
@@ -660,4 +670,3 @@ def _write_to_perf_csv(self, perf_data: Dict[str, Any]) -> None:
                 writer = csv.DictWriter(f, fieldnames=headers, extrasaction="ignore")
                 writer.writeheader()
                 writer.writerow(row_to_write)
-
diff --git a/src/madengine/deployment/common.py b/src/madengine/deployment/common.py
index 5b898960..e7574e31 100644
--- a/src/madengine/deployment/common.py
+++ b/src/madengine/deployment/common.py
@@ -21,7 +21,7 @@
     "primus",
     "vllm",
     "sglang",
-    "sglang-disagg"
+    "sglang-disagg",
 ]
 
 # Tool names that use rocprof / rocprofv3 wrapping and need MPI-aware rocprofv3 on multi-node.
@@ -98,10 +98,7 @@ def is_rocprofv3_available() -> bool:
     """
     try:
         result = subprocess.run(
-            ["rocprofv3", "--help"],
-            capture_output=True,
-            text=True,
-            timeout=5
+            ["rocprofv3", "--help"], capture_output=True, text=True, timeout=5
         )
         return result.returncode == 0
     except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
@@ -109,9 +106,7 @@ def is_rocprofv3_available() -> bool:
 
 
 def configure_multi_node_profiling(
-    nnodes: int,
-    tools_config: List[Dict],
-    logger: Any
+    nnodes: int, tools_config: List[Dict], logger: Any
 ) -> Dict[str, Any]:
     """
     Configure profiling for multi-node runs with rocprofv3 support.
@@ -148,7 +143,7 @@ def configure_multi_node_profiling(
             "enabled": True,
             "mode": "single_node",
             "tools": tools_config,
-            "per_node_collection": False
+            "per_node_collection": False,
         }
 
     if not is_rocprofv3_available():
@@ -156,7 +151,8 @@ def configure_multi_node_profiling(
             filtered_tools: List[Dict] = [
                 t
                 for t in tools_config
-                if isinstance(t, dict) and t.get("name") not in _ROCPROF_FAMILY_TOOL_NAMES
+                if isinstance(t, dict)
+                and t.get("name") not in _ROCPROF_FAMILY_TOOL_NAMES
             ]
             if filtered_tools:
                 logger.warning(
@@ -197,7 +193,7 @@ def configure_multi_node_profiling(
                 "enabled": False,
                 "mode": "multi_node_unsupported",
                 "tools": [],
-                "per_node_collection": False
+                "per_node_collection": False,
             }
         logger.info(
             "Multi-node: rocprofv3 not found on submission host; keeping non-rocprof tools "
@@ -210,7 +206,9 @@ def configure_multi_node_profiling(
             "per_node_collection": True,
         }
 
-    logger.info(f"✓ Multi-node profiling enabled for {nnodes} nodes (rocprofv3 detected)")
+    logger.info(
+        f"✓ Multi-node profiling enabled for {nnodes} nodes (rocprofv3 detected)"
+    )
 
     upgraded_tools: List[Dict] = []
     for tool in tools_config:
@@ -232,9 +230,13 @@ def configure_multi_node_profiling(
         tool_names = [
             t.get("name") if isinstance(t, dict) else str(t) for t in upgraded_tools
         ]
-        logger.info(f"  → Multi-node profiling tools: {', '.join(filter(None, tool_names))}")
+        logger.info(
+            f"  → Multi-node profiling tools: {', '.join(filter(None, tool_names))}"
+        )
         if "rccl_trace" in tool_names:
-            logger.info("  → ✓ rccl_trace enabled (critical for multi-node communication profiling)")
+            logger.info(
+                "  → ✓ rccl_trace enabled (critical for multi-node communication profiling)"
+            )
 
     return {
         "enabled": True,
@@ -242,5 +244,5 @@ def configure_multi_node_profiling(
         "tools": upgraded_tools,
         "per_node_collection": True,
         "profiler": "rocprofv3",
-        "wrapper_mode": "launcher"
+        "wrapper_mode": "launcher",
     }
diff --git a/src/madengine/deployment/config_loader.py b/src/madengine/deployment/config_loader.py
index 06d8a1b1..9e7ccf97 100644
--- a/src/madengine/deployment/config_loader.py
+++ b/src/madengine/deployment/config_loader.py
@@ -11,12 +11,14 @@
 """
 
 import json
+from copy import deepcopy
 from pathlib import Path
 from typing import Any, Callable, Dict, Optional
-from copy import deepcopy
 
 
-def apply_deployment_config(config: Any, load_fn: Callable[[Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
+def apply_deployment_config(
+    config: Any, load_fn: Callable[[Dict[str, Any]], Dict[str, Any]]
+) -> Dict[str, Any]:
     """Apply deployment defaults via a loader and set config.additional_context.
 
     Used by SLURM and Kubernetes deployment classes before calling super().__init__(config).
@@ -35,97 +37,103 @@ def apply_deployment_config(config: Any, load_fn: Callable[[Dict[str, Any]], Dic
 
 class ConfigLoader:
     """Smart configuration loader with preset support."""
-    
+
     PRESET_DIR = Path(__file__).parent / "presets"
-    
+
     @classmethod
     def load_preset(cls, preset_path: str) -> Dict[str, Any]:
         """
         Load a preset JSON file.
-        
+
         Args:
             preset_path: Relative path to preset file from PRESET_DIR
-            
+
         Returns:
             Dict containing preset configuration, or empty dict if not found
         """
         full_path = cls.PRESET_DIR / preset_path
         if not full_path.exists():
             return {}
-        
+
         try:
-            with open(full_path, 'r') as f:
+            with open(full_path, "r") as f:
                 return json.load(f)
         except (json.JSONDecodeError, IOError) as e:
             print(f"Warning: Could not load preset {preset_path}: {e}")
             return {}
-    
+
     @classmethod
     def deep_merge(cls, base: Dict, override: Dict) -> Dict:
         """
         Deep merge two dictionaries. Override wins conflicts.
         Nested dicts are merged, lists/primitives are replaced.
         Special handling: env_vars are merged (not replaced).
-        
+
         Args:
             base: Base dictionary
             override: Override dictionary
-            
+
         Returns:
             Merged dictionary
         """
         result = deepcopy(base)
-        
+
         for key, value in override.items():
             # Skip documentation/comment fields from base if override has them
-            if key.startswith('_'):
+            if key.startswith("_"):
                 result[key] = deepcopy(value)
                 continue
-                
-            if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+
+            if (
+                key in result
+                and isinstance(result[key], dict)
+                and isinstance(value, dict)
+            ):
                 # Recursively merge nested dicts
                 result[key] = cls.deep_merge(result[key], value)
             else:
                 # Replace with override value
                 result[key] = deepcopy(value)
-        
+
         return result
-    
+
     @classmethod
     def detect_profile_needs(cls, config: Dict) -> Dict[str, bool]:
         """
         Detect what profiles/optimizations are needed.
-        
+
         Args:
             config: Configuration dictionary
-            
+
         Returns:
             Dict with flags: is_single_gpu, is_multi_gpu, is_multi_node, is_distributed
         """
         distributed = config.get("distributed", {})
         gpu_count = config.get("k8s", {}).get("gpu_count", 1)
         nnodes = distributed.get("nnodes", 1)
-        
-        is_distributed = distributed.get("enabled", False) or distributed.get("launcher")
+
+        is_distributed = distributed.get("enabled", False) or distributed.get(
+            "launcher"
+        )
         is_multi_gpu = gpu_count > 1 or is_distributed
         is_multi_node = nnodes > 1
-        
+
         return {
             "is_single_gpu": gpu_count == 1 and not is_distributed,
             "is_multi_gpu": is_multi_gpu and not is_multi_node,
             "is_multi_node": is_multi_node,
-            "is_distributed": is_distributed
+            "is_distributed": is_distributed,
         }
-    
+
     @classmethod
     def select_profile(cls, config: Dict, needs: Dict[str, bool]) -> Optional[str]:
         """
         Auto-select k8s profile based on configuration needs.
-        
+
         Args:
             config: Configuration dictionary
             needs: Profile needs from detect_profile_needs()
-            
+
         Returns:
             Profile filename or None
         """
@@ -135,82 +143,82 @@ def select_profile(cls, config: Dict, needs: Dict[str, bool]) -> Optional[str]:
             return "k8s/profiles/multi-gpu.json"
         elif needs["is_single_gpu"]:
             return "k8s/profiles/single-gpu.json"
-        
+
         return None
-    
+
     @classmethod
     def load_k8s_config(cls, user_config: Dict[str, Any]) -> Dict[str, Any]:
         """
         Load complete k8s configuration with multi-layer merging.
-        
+
         Layers:
         1. Base k8s defaults
         2. GPU vendor base preset
         3. GPU vendor multi-GPU preset (if needed)
         4. Profile preset (single-gpu/multi-gpu/multi-node)
         5. User configuration (already merged from file + CLI)
-        
+
         Args:
             user_config: User-provided configuration (merged from file + CLI)
-            
+
         Returns:
             Complete configuration with all defaults applied
         """
         # Layer 1: Base defaults
         config = cls.load_preset("k8s/defaults.json")
-        
+
         # Merge user config temporarily to detect requirements
         temp_config = cls.deep_merge(config, user_config)
         needs = cls.detect_profile_needs(temp_config)
-        
+
         # Layer 2: GPU vendor base preset
         gpu_vendor = temp_config.get("gpu_vendor", "AMD").upper()
         vendor_file = f"k8s/gpu-vendors/{gpu_vendor.lower()}.json"
         vendor_preset = cls.load_preset(vendor_file)
         config = cls.deep_merge(config, vendor_preset)
-        
+
         # Layer 3: GPU vendor multi-GPU optimizations (AMD only, when needed)
         if gpu_vendor == "AMD" and (needs["is_multi_gpu"] or needs["is_multi_node"]):
             amd_multi_preset = cls.load_preset("k8s/gpu-vendors/amd-multi-gpu.json")
             config = cls.deep_merge(config, amd_multi_preset)
-        
+
         # Layer 4: Profile preset based on detected needs
         profile_file = cls.select_profile(temp_config, needs)
         if profile_file:
             profile_preset = cls.load_preset(profile_file)
             config = cls.deep_merge(config, profile_preset)
-        
+
         # Layer 5: User configuration (highest priority)
         config = cls.deep_merge(config, user_config)
-        
+
         return config
-    
+
     @classmethod
     def load_slurm_config(cls, user_config: Dict[str, Any]) -> Dict[str, Any]:
         """
         Load complete SLURM configuration with multi-layer merging.
-        
+
         Layers:
         1. Base SLURM defaults
         2. Profile preset (single-node/multi-node)
         3. User configuration (already merged from file + CLI)
-        
+
         Args:
             user_config: User-provided configuration
-            
+
         Returns:
             Complete configuration with defaults applied
         """
         # Layer 1: Base defaults
         config = cls.load_preset("slurm/defaults.json")
-        
+
         # Merge user config temporarily to detect requirements
         temp_config = cls.deep_merge(config, user_config)
-        
+
         # Layer 2: Profile preset based on detected configuration
         slurm_config = temp_config.get("slurm", {})
         nodes = slurm_config.get("nodes", 1)
-        
+
         # Select profile based on node count
         if nodes > 1:
             profile_preset = cls.load_preset("slurm/profiles/multi-node.json")
@@ -218,39 +226,39 @@ def load_slurm_config(cls, user_config: Dict[str, Any]) -> Dict[str, Any]:
         else:
             profile_preset = cls.load_preset("slurm/profiles/single-node.json")
             config = cls.deep_merge(config, profile_preset)
-        
+
         # Layer 3: User configuration (highest priority)
         config = cls.deep_merge(config, user_config)
-        
+
         return config
-    
+
     @classmethod
     def infer_and_validate_deploy_type(cls, user_config: Dict[str, Any]) -> str:
         """
         Infer deployment type from config structure and validate for conflicts.
-        
+
         Convention over Configuration: Presence of k8s/slurm field indicates deployment intent.
-        
+
         Args:
             user_config: User configuration dictionary
-            
+
         Returns:
             Deployment type: "k8s", "slurm", or "local"
-            
+
         Raises:
             ValueError: If conflicting deployment configs present
         """
         has_k8s = "k8s" in user_config or "kubernetes" in user_config
         has_slurm = "slurm" in user_config
         explicit_deploy = user_config.get("deploy", "").lower()
-        
+
         # Validation Rule 1: Can't have both k8s and slurm configs
         if has_k8s and has_slurm:
             raise ValueError(
                 "Conflicting deployment configuration: Both 'k8s' and 'slurm' fields present. "
                 "Please specify only one deployment target."
             )
-        
+
         # Validation Rule 2: If explicit deploy set, it must match config presence
         if explicit_deploy:
             if explicit_deploy in ["k8s", "kubernetes"] and not has_k8s:
@@ -268,7 +276,7 @@ def infer_and_validate_deploy_type(cls, user_config: Dict[str, Any]) -> str:
                     f"Conflicting deployment: 'deploy' field is 'local' but k8s/slurm config present. "
                     "Remove k8s/slurm config for local execution."
                 )
-        
+
         # Infer deployment type from config presence
         if has_k8s:
             return "k8s"
@@ -276,34 +284,34 @@ def infer_and_validate_deploy_type(cls, user_config: Dict[str, Any]) -> str:
             return "slurm"
         else:
             return "local"
-    
+
     @classmethod
     def load_config(cls, user_config: Dict[str, Any]) -> Dict[str, Any]:
         """
         Load configuration with auto-inferred deploy type and validation.
-        
+
         Infers deployment type from presence of k8s/slurm fields.
         Validates for conflicting configurations.
         Applies appropriate defaults based on deployment type.
-        
+
         Convention over Configuration:
         - Presence of "k8s" field → Kubernetes deployment
         - Presence of "slurm" field → SLURM deployment
         - Neither present → Local execution
         - No explicit "deploy" field needed!
-        
+
         Args:
             user_config: User configuration (from file + CLI merge)
-            
+
         Returns:
             Complete configuration with defaults applied (no deploy field added)
-            
+
         Raises:
             ValueError: If conflicting deployment configs present
         """
         # Infer and validate deployment type
         deploy_type = cls.infer_and_validate_deploy_type(user_config)
-        
+
         # Apply appropriate defaults based on deployment type
         # Note: We do NOT add a "deploy" field - type is inferred from structure
         if deploy_type == "k8s":
@@ -313,4 +321,3 @@ def load_config(cls, user_config: Dict[str, Any]) -> Dict[str, Any]:
         else:
             # Local - return as-is (no deploy field needed)
             return user_config
-
diff --git a/src/madengine/deployment/factory.py b/src/madengine/deployment/factory.py
index dea54557..833ae033 100644
--- a/src/madengine/deployment/factory.py
+++ b/src/madengine/deployment/factory.py
@@ -89,6 +89,7 @@ def register_default_deployments():
         DeploymentFactory.register("kubernetes", KubernetesDeployment)
     except ImportError:
         import warnings
+
         warnings.warn(
             "Kubernetes deployment target is unavailable: the 'kubernetes' library is not "
             "installed. Install it with: pip install madengine[kubernetes] "
@@ -100,4 +101,3 @@ def register_default_deployments():
 
 # Auto-register on module import
 register_default_deployments()
-
diff --git a/src/madengine/deployment/k8s_names.py b/src/madengine/deployment/k8s_names.py
index bfa90569..cbbd9f41 100644
--- a/src/madengine/deployment/k8s_names.py
+++ b/src/madengine/deployment/k8s_names.py
@@ -36,7 +36,9 @@ def _trim_edges_alnum(s: str) -> str:
     return s or "x"
 
 
-def sanitize_k8s_object_name(prefix: str, raw_model_name: str, max_total_len: int = _MAX_OBJECT_NAME_LEN) -> str:
+def sanitize_k8s_object_name(
+    prefix: str, raw_model_name: str, max_total_len: int = _MAX_OBJECT_NAME_LEN
+) -> str:
     """
     Build a valid ``metadata.name`` substring from a model name.
 
@@ -74,7 +76,9 @@ def sanitize_k8s_object_name(prefix: str, raw_model_name: str, max_total_len: in
     room = max_total_len - len(anchor) - 1
     if room < 8:
         # Extreme: prefix alone too long — fall back to hash-only tail
-        return _trim_edges_alnum(f"{digest}-{hashlib.sha256(raw.encode()).hexdigest()[:20]}")[:max_total_len]
+        return _trim_edges_alnum(
+            f"{digest}-{hashlib.sha256(raw.encode()).hexdigest()[:20]}"
+        )[:max_total_len]
 
     tail = body[:room] if room > 0 else ""
     tail = _trim_edges_alnum(tail) if tail else "m"
@@ -84,7 +88,9 @@ def sanitize_k8s_object_name(prefix: str, raw_model_name: str, max_total_len: in
     return _trim_edges_alnum(out)
 
 
-def sanitize_k8s_container_name(name_hint: str, max_len: int = _MAX_DNS_LABEL_LEN) -> str:
+def sanitize_k8s_container_name(
+    name_hint: str, max_len: int = _MAX_DNS_LABEL_LEN
+) -> str:
     """
     Sanitize for ``spec.containers[].name`` / initContainer names.
 
diff --git a/src/madengine/deployment/kubernetes.py b/src/madengine/deployment/kubernetes.py
index 927ec878..437a8ee1 100644
--- a/src/madengine/deployment/kubernetes.py
+++ b/src/madengine/deployment/kubernetes.py
@@ -35,7 +35,13 @@
 
 from jinja2 import Template
 
-from .base import BaseDeployment, DeploymentConfig, DeploymentResult, DeploymentStatus, create_jinja_env
+from .base import (
+    BaseDeployment,
+    DeploymentConfig,
+    DeploymentResult,
+    DeploymentStatus,
+    create_jinja_env,
+)
 from .common import (
     configure_multi_node_profiling,
     normalize_launcher,
@@ -57,11 +63,19 @@
 from madengine.core.errors import ConfigurationError
 from madengine.utils.gpu_config import resolve_runtime_gpus
 from madengine.utils.path_utils import get_madengine_root, scripts_base_dir_from
-from madengine.utils.run_details import flatten_tags_in_place, get_build_number, get_pipeline
+from madengine.utils.run_details import (
+    flatten_tags_in_place,
+    get_build_number,
+    get_pipeline,
+)
 
 try:
     from madengine.reporting.update_perf_csv import update_perf_csv
-    from madengine.reporting.update_perf_super import update_perf_super_json, update_perf_super_csv
+    from madengine.reporting.update_perf_super import (
+        update_perf_super_json,
+        update_perf_super_csv,
+    )
+
     REPORTING_AVAILABLE = True
 except ImportError:
     REPORTING_AVAILABLE = False
@@ -78,6 +92,8 @@
 def _pod_job_name_label_selector(deployment_id: str) -> str:
     """Selector for the ``job-name`` pod label; value must be a valid ≤63-char label value."""
     return f"job-name={sanitize_k8s_label_value(deployment_id)}"
+
+
 from .primus_backend import (
     infer_primus_backend_from_model_name,
     infer_primus_examples_overlay_subdirs,
@@ -107,7 +123,9 @@ def match_pvc_subdir_to_k8s_pod(
     return sorted(prefixed)[0]
 
 
-def assign_pvc_subdirs_to_pods(pod_dirs: List[str], pod_names: List[str]) -> Dict[str, str]:
+def assign_pvc_subdirs_to_pods(
+    pod_dirs: List[str], pod_names: List[str]
+) -> Dict[str, str]:
     """
     Assign each PVC subdir to at most one pod. Process longest names first so
     short prefixes do not steal pods (e.g. ``foo-0`` before ``foo``).
@@ -162,8 +180,7 @@ def __init__(self, config: DeploymentConfig):
 
         if not YAML_AVAILABLE:
             raise ImportError(
-                "PyYAML library not installed.\n"
-                "Install with: pip install pyyaml"
+                "PyYAML library not installed.\n" "Install with: pip install pyyaml"
             )
 
         apply_deployment_config(config, ConfigLoader.load_k8s_config)
@@ -208,7 +225,9 @@ def __init__(self, config: DeploymentConfig):
         self.job_name = None
         self.job_label = None  # pod label job-name + label selectors; ≤63 chars (sanitize_k8s_label_value)
         self.service_name = None  # headless Service metadata.name + Pod subdomain; DNS label ≤63 (no dots)
-        self.main_container_name = None  # same string as service_name (container names are DNS labels)
+        self.main_container_name = (
+            None  # same string as service_name (container names are DNS labels)
+        )
         self.configmap_name = None
         self.configmap_yaml = None
         self.job_yaml = None
@@ -347,9 +366,9 @@ def gather_system_env_details(
     ) -> None:
         """
         Gather system environment details by adding rocEnvTool to pre-scripts.
-        
+
         This ensures K8s deployment collects the same system info as local execution.
-        
+
         Args:
             pre_scripts: List of pre-script configurations
             model_name: The model name (used for output file naming)
@@ -357,18 +376,22 @@ def gather_system_env_details(
         # Add rocEnvTool pre-script with model-specific output name
         pre_env_details = {
             "path": "scripts/common/pre_scripts/run_rocenv_tool.sh",
-            "args": model_name.replace("/", "_") + "_env"
+            "args": model_name.replace("/", "_") + "_env",
         }
         pre_scripts.append(pre_env_details)
-        self.console.print(f"[dim]Added rocEnvTool to pre-scripts with args: {pre_env_details['args']}[/dim]")
-    
-    def _add_tool_scripts(self, pre_scripts: List[Dict], post_scripts: List[Dict]) -> None:
+        self.console.print(
+            f"[dim]Added rocEnvTool to pre-scripts with args: {pre_env_details['args']}[/dim]"
+        )
+
+    def _add_tool_scripts(
+        self, pre_scripts: List[Dict], post_scripts: List[Dict]
+    ) -> None:
         """
         Add tool pre/post scripts to execution lists (similar to local execution).
-        
+
         Extracts pre_scripts and post_scripts from tools.json definitions and adds them
         to the pre_scripts and post_scripts lists for execution in K8s pods.
-        
+
         Args:
             pre_scripts: List to append tool pre-scripts to
             post_scripts: List to append tool post-scripts to
@@ -376,95 +399,110 @@ def _add_tool_scripts(self, pre_scripts: List[Dict], post_scripts: List[Dict]) -
         tools_config = self._get_tools_config()
         if not tools_config:
             return
-        
+
         # Load tools.json to get pre/post script definitions
         tools_json_path = get_madengine_root() / "scripts" / "common" / "tools.json"
         if not tools_json_path.exists():
             return
-        
+
         with open(tools_json_path, "r") as f:
             tools_definitions = json.load(f)
-        
+
         # Add pre/post scripts from each configured tool
         for tool in tools_config:
             tool_name = tool.get("name")
             if not tool_name or tool_name not in tools_definitions.get("tools", {}):
                 continue
-            
+
             tool_def = tools_definitions["tools"][tool_name]
-            
+
             # Add pre-scripts (at beginning, like local execution)
             if "pre_scripts" in tool_def:
                 pre_scripts[:0] = tool_def["pre_scripts"]
-            
+
             # Add post-scripts (at end, like local execution)
             if "post_scripts" in tool_def:
                 post_scripts.extend(tool_def["post_scripts"])
-    
+
     def _load_common_scripts(self, script_list: List[Dict]) -> Dict[str, str]:
         """
         Load common script contents from madengine package for embedding in ConfigMap.
-        
+
         Since madengine is not installed in model Docker images, we need to embed
         the common scripts (pre_scripts, post_scripts, and tool wrapper scripts) in the ConfigMap.
-        
+
         Args:
             script_list: List of script configurations with 'path' field
-            
+
         Returns:
             Dict mapping relative script paths to their contents
         """
         import os
+
         script_contents = {}
         madengine_root = get_madengine_root()
-        
+
         for script_config in script_list:
             script_path = script_config.get("path", "")
             if not script_path:
                 continue
-            
+
             # Convert to absolute path from madengine root
             abs_script_path = madengine_root / script_path
-            
+
             if abs_script_path.exists() and abs_script_path.is_file():
                 with open(abs_script_path, "r") as f:
                     script_contents[script_path] = f.read()
                 self.console.print(f"[dim]Loaded common script: {script_path}[/dim]")
-                
+
                 # If it's run_rocenv_tool.sh, also load the entire rocEnvTool directory
                 if "run_rocenv_tool.sh" in script_path:
                     rocenv_dir = abs_script_path.parent / "rocEnvTool"
                     if rocenv_dir.exists() and rocenv_dir.is_dir():
                         # Load all Python files
                         for py_file in rocenv_dir.glob("*.py"):
-                            rel_path = f"scripts/common/pre_scripts/rocEnvTool/{py_file.name}"
+                            rel_path = (
+                                f"scripts/common/pre_scripts/rocEnvTool/{py_file.name}"
+                            )
                             with open(py_file, "r") as f:
                                 script_contents[rel_path] = f.read()
-                            self.console.print(f"[dim]Loaded rocEnvTool file: {rel_path}[/dim]")
-                        
+                            self.console.print(
+                                f"[dim]Loaded rocEnvTool file: {rel_path}[/dim]"
+                            )
+
                         # Load all JSON files (e.g., env_tags.json)
                         for json_file in rocenv_dir.glob("*.json"):
                             rel_path = f"scripts/common/pre_scripts/rocEnvTool/{json_file.name}"
                             with open(json_file, "r") as f:
                                 script_contents[rel_path] = f.read()
-                            self.console.print(f"[dim]Loaded rocEnvTool file: {rel_path}[/dim]")
+                            self.console.print(
+                                f"[dim]Loaded rocEnvTool file: {rel_path}[/dim]"
+                            )
             else:
-                self.console.print(f"[yellow]Warning: Script not found: {script_path} (at {abs_script_path})[/yellow]")
-        
+                self.console.print(
+                    f"[yellow]Warning: Script not found: {script_path} (at {abs_script_path})[/yellow]"
+                )
+
         # Load tool wrapper scripts if tools are configured
         tools_config = self._get_tools_config()
         if tools_config:
-            self._load_tool_wrapper_scripts(script_contents, tools_config, madengine_root)
-        
+            self._load_tool_wrapper_scripts(
+                script_contents, tools_config, madengine_root
+            )
+
         return script_contents
-    
-    def _load_tool_wrapper_scripts(self, script_contents: Dict[str, str], 
-                                   tools_config: List[Dict], madengine_root: Path) -> None:
+
+    def _load_tool_wrapper_scripts(
+        self,
+        script_contents: Dict[str, str],
+        tools_config: List[Dict],
+        madengine_root: Path,
+    ) -> None:
         """
         Load tool wrapper scripts and tools.json for K8s ConfigMap.
-        
+
         This enables profiling tools like rocprof to work in K8s deployments.
-        
+
         Args:
             script_contents: Dict to populate with script contents
             tools_config: List of tool configurations from manifest
@@ -475,28 +513,34 @@ def _load_tool_wrapper_scripts(self, script_contents: Dict[str, str],
         if tools_json_path.exists():
             with open(tools_json_path, "r") as f:
                 tools_definitions = json.load(f)
-                script_contents["scripts/common/tools.json"] = json.dumps(tools_definitions, indent=2)
+                script_contents["scripts/common/tools.json"] = json.dumps(
+                    tools_definitions, indent=2
+                )
             self.console.print(f"[dim]Loaded tools.json[/dim]")
         else:
-            self.console.print(f"[yellow]Warning: tools.json not found at {tools_json_path}[/yellow]")
+            self.console.print(
+                f"[yellow]Warning: tools.json not found at {tools_json_path}[/yellow]"
+            )
             return
-        
+
         # Extract and load wrapper scripts referenced in tool commands
         for tool in tools_config:
             tool_name = tool.get("name")
             if not tool_name:
                 continue
-            
+
             # Get tool definition from tools.json
             if tool_name not in tools_definitions.get("tools", {}):
-                self.console.print(f"[yellow]Warning: Tool '{tool_name}' not found in tools.json[/yellow]")
+                self.console.print(
+                    f"[yellow]Warning: Tool '{tool_name}' not found in tools.json[/yellow]"
+                )
                 continue
-            
+
             tool_def = tools_definitions["tools"][tool_name]
-            
+
             # Extract cmd - could be from tool config override or tool definition
             cmd = tool.get("cmd", tool_def.get("cmd", ""))
-            
+
             # Check if cmd references a script in scripts/common/tools/
             if "scripts/common/tools/" in cmd:
                 # Parse script path from command (e.g., "bash ../scripts/common/tools/rocprof_wrapper.sh --runtime-trace")
@@ -508,29 +552,43 @@ def _load_tool_wrapper_scripts(self, script_contents: Dict[str, str],
                         # Remove ../ prefix if present
                         script_rel_path = part.replace("../", "")
                         abs_script_path = madengine_root / script_rel_path
-                        
+
                         if abs_script_path.exists() and abs_script_path.is_file():
                             with open(abs_script_path, "r") as f:
                                 script_contents[script_rel_path] = f.read()
-                            self.console.print(f"[dim]Loaded tool script: {script_rel_path}[/dim]")
-                            
+                            self.console.print(
+                                f"[dim]Loaded tool script: {script_rel_path}[/dim]"
+                            )
+
                             # If it's a Python script, also load utility modules it might depend on
-                            if script_rel_path.endswith('.py'):
+                            if script_rel_path.endswith(".py"):
                                 tools_dir = abs_script_path.parent
                                 # Load common utility modules that profiling tools depend on
-                                utility_modules = ['amd_smi_utils.py', 'rocm_smi_utils.py', 'pynvml_utils.py']
+                                utility_modules = [
+                                    "amd_smi_utils.py",
+                                    "rocm_smi_utils.py",
+                                    "pynvml_utils.py",
+                                ]
                                 for util_file in utility_modules:
                                     util_path = tools_dir / util_file
                                     if util_path.exists():
-                                        util_rel_path = f"scripts/common/tools/{util_file}"
+                                        util_rel_path = (
+                                            f"scripts/common/tools/{util_file}"
+                                        )
                                         if util_rel_path not in script_contents:
                                             with open(util_path, "r") as f:
-                                                script_contents[util_rel_path] = f.read()
-                                            self.console.print(f"[dim]Loaded tool utility module: {util_rel_path}[/dim]")
+                                                script_contents[util_rel_path] = (
+                                                    f.read()
+                                                )
+                                            self.console.print(
+                                                f"[dim]Loaded tool utility module: {util_rel_path}[/dim]"
+                                            )
                         else:
-                            self.console.print(f"[yellow]Warning: Tool script not found: {script_rel_path} (at {abs_script_path})[/yellow]")
+                            self.console.print(
+                                f"[yellow]Warning: Tool script not found: {script_rel_path} (at {abs_script_path})[/yellow]"
+                            )
                         break
-            
+
             # Also load any tool-specific pre_scripts and post_scripts
             for script_config in tool_def.get("pre_scripts", []):
                 script_path = script_config.get("path", "")
@@ -539,8 +597,10 @@ def _load_tool_wrapper_scripts(self, script_contents: Dict[str, str],
                     if abs_script_path.exists():
                         with open(abs_script_path, "r") as f:
                             script_contents[script_path] = f.read()
-                        self.console.print(f"[dim]Loaded tool pre-script: {script_path}[/dim]")
-            
+                        self.console.print(
+                            f"[dim]Loaded tool pre-script: {script_path}[/dim]"
+                        )
+
             for script_config in tool_def.get("post_scripts", []):
                 script_path = script_config.get("path", "")
                 if script_path and script_path not in script_contents:
@@ -548,8 +608,10 @@ def _load_tool_wrapper_scripts(self, script_contents: Dict[str, str],
                     if abs_script_path.exists():
                         with open(abs_script_path, "r") as f:
                             script_contents[script_path] = f.read()
-                        self.console.print(f"[dim]Loaded tool post-script: {script_path}[/dim]")
-            
+                        self.console.print(
+                            f"[dim]Loaded tool post-script: {script_path}[/dim]"
+                        )
+
             # NEW: Scan pre-scripts for dependencies on scripts/common/tools/ files
             # This handles cases like gpu_info_vram_profiler where the pre-script
             # calls python3 scripts/common/tools/gpu_info_profiler.py but the tool
@@ -564,30 +626,51 @@ def _load_tool_wrapper_scripts(self, script_contents: Dict[str, str],
                             script_content = f.read()
                             # Look for references to scripts/common/tools/ in the pre-script
                             import re
+
                             # Use non-capturing group (?:...) to avoid capturing just the ../ part
-                            tool_refs = re.findall(r'(?:\.\./)?scripts/common/tools/[\w_]+\.py', script_content)
+                            tool_refs = re.findall(
+                                r"(?:\.\./)?scripts/common/tools/[\w_]+\.py",
+                                script_content,
+                            )
                             for tool_ref in tool_refs:
                                 # Clean up the path
-                                tool_script_path = tool_ref.strip('"\'').replace("../", "")
+                                tool_script_path = tool_ref.strip("\"'").replace(
+                                    "../", ""
+                                )
                                 abs_tool_path = madengine_root / tool_script_path
-                                
-                                if abs_tool_path.exists() and tool_script_path not in script_contents:
+
+                                if (
+                                    abs_tool_path.exists()
+                                    and tool_script_path not in script_contents
+                                ):
                                     with open(abs_tool_path, "r") as tf:
                                         script_contents[tool_script_path] = tf.read()
-                                    self.console.print(f"[dim]Loaded tool dependency: {tool_script_path}[/dim]")
-                                    
+                                    self.console.print(
+                                        f"[dim]Loaded tool dependency: {tool_script_path}[/dim]"
+                                    )
+
                                     # Also load utility modules for this Python script
-                                    if tool_script_path.endswith('.py'):
+                                    if tool_script_path.endswith(".py"):
                                         tools_dir = abs_tool_path.parent
-                                        utility_modules = ['amd_smi_utils.py', 'rocm_smi_utils.py', 'pynvml_utils.py']
+                                        utility_modules = [
+                                            "amd_smi_utils.py",
+                                            "rocm_smi_utils.py",
+                                            "pynvml_utils.py",
+                                        ]
                                         for util_file in utility_modules:
                                             util_path = tools_dir / util_file
                                             if util_path.exists():
-                                                util_rel_path = f"scripts/common/tools/{util_file}"
+                                                util_rel_path = (
+                                                    f"scripts/common/tools/{util_file}"
+                                                )
                                                 if util_rel_path not in script_contents:
                                                     with open(util_path, "r") as uf:
-                                                        script_contents[util_rel_path] = uf.read()
-                                                    self.console.print(f"[dim]Loaded utility module (from dependency): {util_rel_path}[/dim]")
+                                                        script_contents[
+                                                            util_rel_path
+                                                        ] = uf.read()
+                                                    self.console.print(
+                                                        f"[dim]Loaded utility module (from dependency): {util_rel_path}[/dim]"
+                                                    )
 
     def _bundle_primus_k8s_examples_overlay(
         self, model_scripts_contents: Dict[str, str], model_name: str = ""
@@ -644,7 +727,9 @@ def _add_primus_file(host_file: Path) -> bool:
         req = primus_repo / "requirements.txt"
         if req.is_file():
             if _add_primus_file(req):
-                self.console.print("[dim]Primus K8s: bundled Primus/requirements.txt[/dim]")
+                self.console.print(
+                    "[dim]Primus K8s: bundled Primus/requirements.txt[/dim]"
+                )
 
         ex_scripts = primus_repo / "examples" / "scripts"
         if ex_scripts.is_dir():
@@ -661,7 +746,9 @@ def _add_primus_file(host_file: Path) -> bool:
         run_pre = primus_repo / "examples" / "run_pretrain.sh"
         if run_pre.is_file():
             if _add_primus_file(run_pre):
-                self.console.print("[dim]Primus K8s: bundled Primus/examples/run_pretrain.sh[/dim]")
+                self.console.print(
+                    "[dim]Primus K8s: bundled Primus/examples/run_pretrain.sh[/dim]"
+                )
 
         for sub in subdirs:
             base = primus_repo / "examples" / sub
@@ -709,7 +796,7 @@ def _prepare_template_context(
         if credential_path.exists():
             with open(credential_path, "r") as f:
                 credential_content = f.read()
-        
+
         # Load data.json content if exists
         data_json_content = None
         data_path = Path("data.json")
@@ -720,17 +807,19 @@ def _prepare_template_context(
 
         # Load model scripts directory content (entire folder, not just one file)
         # This matches local execution which mounts the entire MODEL_DIR/scripts folder
-        model_script_path = model_info.get("scripts")  # e.g., "scripts/dummy/run_data_minio.sh"
+        model_script_path = model_info.get(
+            "scripts"
+        )  # e.g., "scripts/dummy/run_data_minio.sh"
         model_script_dir = None
         model_script_filename = None
         model_scripts_contents = {}  # Store all scripts in the directory
-        
+
         if model_script_path:
             script_file = Path(model_script_path)
             # Extract directory and filename
             model_script_dir = str(script_file.parent)  # e.g., "scripts/dummy"
-            model_script_filename = script_file.name     # e.g., "run_data_minio.sh"
-            
+            model_script_filename = script_file.name  # e.g., "run_data_minio.sh"
+
             # Bundle entire scripts/<model> directory recursively for reliability across
             # different model types (vllm, sglang, etc.) with varying file types and subdirs
             scripts_dir_path = Path(model_script_dir)
@@ -758,19 +847,23 @@ def _prepare_template_context(
                 # Fallback: load single file if directory doesn't exist
                 with open(script_file, "r") as f:
                     model_scripts_contents[model_script_path] = f.read()
-                self.console.print(f"[dim]Loaded single script: {model_script_path}[/dim]")
+                self.console.print(
+                    f"[dim]Loaded single script: {model_script_path}[/dim]"
+                )
             else:
-                self.console.print(f"[yellow]Warning: Script not found: {model_script_path}[/yellow]")
-        
+                self.console.print(
+                    f"[yellow]Warning: Script not found: {model_script_path}[/yellow]"
+                )
+
         # Load K8s tools configuration
         k8s_tools_config = self._load_k8s_tools()
-        
+
         # Prepare data configuration first
         data_config = self._prepare_data_config(model_info)
-        
+
         # Store for use in deploy() method
         self._data_config = data_config
-        
+
         # K8s best practice: Auto-create shared data PVC if needed
         # K8s philosophy: Separate compute (pods) from storage (PVC)
         if data_config and not self.k8s_config.get("data_pvc"):
@@ -790,7 +883,7 @@ def _prepare_template_context(
             )
             # Set PVC name now so templates are rendered with correct value
             self.k8s_config["data_pvc"] = "madengine-shared-data"
-        
+
         # Determine data provider script if model needs data
         data_provider_script = None
         data_provider_script_content = None
@@ -798,16 +891,20 @@ def _prepare_template_context(
             provider_type = data_config.get("provider_type", "local")
             if provider_type in k8s_tools_config.get("data_providers", {}):
                 data_provider_script = k8s_tools_config["data_providers"][provider_type]
-                
+
                 # Load K8s data provider script content
                 k8s_script_path = get_madengine_root() / data_provider_script["script"]
                 if k8s_script_path.exists():
                     with open(k8s_script_path, "r") as f:
                         data_provider_script_content = f.read()
-                    self.console.print(f"[dim]Loaded K8s data provider: {data_provider_script['script']}[/dim]")
+                    self.console.print(
+                        f"[dim]Loaded K8s data provider: {data_provider_script['script']}[/dim]"
+                    )
                 else:
-                    self.console.print(f"[yellow]Warning: K8s script not found: {k8s_script_path}[/yellow]")
-        
+                    self.console.print(
+                        f"[yellow]Warning: K8s script not found: {k8s_script_path}[/yellow]"
+                    )
+
         # Get launcher configuration from manifest's deployment_config or additional_context
         deployment_config = self.manifest.get("deployment_config", {})
         distributed_config = deployment_config.get("distributed", {})
@@ -816,86 +913,130 @@ def _prepare_template_context(
         # Merge manifest and runtime launcher config (runtime overrides)
         # Use explicit None checking to handle 0 values correctly
         launcher_type = (
-            launcher_config.get("type") 
-            if launcher_config.get("type") is not None 
+            launcher_config.get("type")
+            if launcher_config.get("type") is not None
             else distributed_config.get("launcher")
         )
-        
+
         nnodes = (
             launcher_config.get("nnodes")
             if launcher_config.get("nnodes") is not None
             else distributed_config.get("nnodes", 1)
         )
-        
+
         # Store for use in deploy() method
         self._nnodes = nnodes
-        
+
         nproc_per_node = (
             launcher_config.get("nproc_per_node")
             if launcher_config.get("nproc_per_node") is not None
-            else distributed_config.get("nproc_per_node")
-            if distributed_config.get("nproc_per_node") is not None
-            else int(model_info.get("n_gpus", 1))
+            else (
+                distributed_config.get("nproc_per_node")
+                if distributed_config.get("nproc_per_node") is not None
+                else int(model_info.get("n_gpus", 1))
+            )
         )
-        
+
         master_port = launcher_config.get("master_port", 29500)
 
         # Validate configuration
         if launcher_type == "torchrun":
             if not isinstance(nnodes, int) or nnodes < 1:
-                raise ValueError(f"Invalid nnodes: {nnodes}. Must be positive integer >= 1")
+                raise ValueError(
+                    f"Invalid nnodes: {nnodes}. Must be positive integer >= 1"
+                )
             if not isinstance(nproc_per_node, int) or nproc_per_node < 1:
-                raise ValueError(f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1")
-            
-            self.console.print(f"[cyan]Configuring torchrun: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]")
-        
+                raise ValueError(
+                    f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1"
+                )
+
+            self.console.print(
+                f"[cyan]Configuring torchrun: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]"
+            )
+
         elif launcher_type == "deepspeed":
             if not isinstance(nnodes, int) or nnodes < 1:
-                raise ValueError(f"Invalid nnodes: {nnodes}. Must be positive integer >= 1")
+                raise ValueError(
+                    f"Invalid nnodes: {nnodes}. Must be positive integer >= 1"
+                )
             if not isinstance(nproc_per_node, int) or nproc_per_node < 1:
-                raise ValueError(f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1")
-            
-            self.console.print(f"[cyan]Configuring DeepSpeed: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]")
+                raise ValueError(
+                    f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1"
+                )
+
+            self.console.print(
+                f"[cyan]Configuring DeepSpeed: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]"
+            )
 
         elif launcher_type == "torchtitan":
             if not isinstance(nnodes, int) or nnodes < 1:
-                raise ValueError(f"Invalid nnodes: {nnodes}. Must be positive integer >= 1")
+                raise ValueError(
+                    f"Invalid nnodes: {nnodes}. Must be positive integer >= 1"
+                )
             if not isinstance(nproc_per_node, int) or nproc_per_node < 1:
-                raise ValueError(f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1")
-            
-            self.console.print(f"[cyan]Configuring TorchTitan: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]")
+                raise ValueError(
+                    f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1"
+                )
+
+            self.console.print(
+                f"[cyan]Configuring TorchTitan: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]"
+            )
 
         elif launcher_type == "vllm":
             if not isinstance(nnodes, int) or nnodes < 1:
-                raise ValueError(f"Invalid nnodes: {nnodes}. Must be positive integer >= 1")
+                raise ValueError(
+                    f"Invalid nnodes: {nnodes}. Must be positive integer >= 1"
+                )
             if not isinstance(nproc_per_node, int) or nproc_per_node < 1:
-                raise ValueError(f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1")
-            
-            self.console.print(f"[cyan]Configuring vLLM: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]")
+                raise ValueError(
+                    f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1"
+                )
+
+            self.console.print(
+                f"[cyan]Configuring vLLM: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]"
+            )
 
         elif launcher_type == "sglang":
             if not isinstance(nnodes, int) or nnodes < 1:
-                raise ValueError(f"Invalid nnodes: {nnodes}. Must be positive integer >= 1")
+                raise ValueError(
+                    f"Invalid nnodes: {nnodes}. Must be positive integer >= 1"
+                )
             if not isinstance(nproc_per_node, int) or nproc_per_node < 1:
-                raise ValueError(f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1")
-            
-            self.console.print(f"[cyan]Configuring SGLang: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]")
+                raise ValueError(
+                    f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1"
+                )
+
+            self.console.print(
+                f"[cyan]Configuring SGLang: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]"
+            )
 
         elif launcher_type == "megatron":
             if not isinstance(nnodes, int) or nnodes < 1:
-                raise ValueError(f"Invalid nnodes: {nnodes}. Must be positive integer >= 1")
+                raise ValueError(
+                    f"Invalid nnodes: {nnodes}. Must be positive integer >= 1"
+                )
             if not isinstance(nproc_per_node, int) or nproc_per_node < 1:
-                raise ValueError(f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1")
-            
-            self.console.print(f"[cyan]Configuring Megatron-LM: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]")
+                raise ValueError(
+                    f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1"
+                )
+
+            self.console.print(
+                f"[cyan]Configuring Megatron-LM: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]"
+            )
 
         elif launcher_type == "primus":
             if not isinstance(nnodes, int) or nnodes < 1:
-                raise ValueError(f"Invalid nnodes: {nnodes}. Must be positive integer >= 1")
+                raise ValueError(
+                    f"Invalid nnodes: {nnodes}. Must be positive integer >= 1"
+                )
             if not isinstance(nproc_per_node, int) or nproc_per_node < 1:
-                raise ValueError(f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1")
-            
-            self.console.print(f"[cyan]Configuring Primus: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]")
+                raise ValueError(
+                    f"Invalid nproc_per_node: {nproc_per_node}. Must be positive integer >= 1"
+                )
+
+            self.console.print(
+                f"[cyan]Configuring Primus: {nnodes} nodes × {nproc_per_node} GPUs/node[/cyan]"
+            )
             self._bundle_primus_k8s_examples_overlay(model_scripts_contents, model_name)
 
         # Determine if we need multi-node setup
@@ -905,32 +1046,38 @@ def _prepare_template_context(
         if launcher_type == "torchrun":
             if nnodes > 1:
                 create_headless_service = True
-                self.console.print(f"[dim]Multi-node detected: Creating headless service for pod discovery[/dim]")
-            
+                self.console.print(
+                    f"[dim]Multi-node detected: Creating headless service for pod discovery[/dim]"
+                )
+
             # Generate torchrun launcher command
             launcher_command = self._generate_torchrun_command(
                 nnodes=nnodes,
                 nproc_per_node=nproc_per_node,
                 master_port=master_port,
-                model_script=model_info.get("scripts", "run.sh")
+                model_script=model_info.get("scripts", "run.sh"),
             )
-        
+
         elif launcher_type == "deepspeed":
             if nnodes > 1:
                 create_headless_service = True
-                self.console.print(f"[dim]Multi-node DeepSpeed: Creating headless service for pod discovery[/dim]")
-            
+                self.console.print(
+                    f"[dim]Multi-node DeepSpeed: Creating headless service for pod discovery[/dim]"
+                )
+
             model_script = model_info.get("scripts", "run.sh")
-            
+
             # Check if script is a bash script - if so, execute it directly
             # as it will handle the launcher internally
-            if model_script.endswith('.sh'):
-                self.console.print(f"[dim]Detected bash script ({model_script}), will execute directly[/dim]")
+            if model_script.endswith(".sh"):
+                self.console.print(
+                    f"[dim]Detected bash script ({model_script}), will execute directly[/dim]"
+                )
                 launcher_command = self._generate_bash_script_command(
                     nnodes=nnodes,
                     nproc_per_node=nproc_per_node,
                     master_port=master_port,
-                    model_script=model_script
+                    model_script=model_script,
                 )
             else:
                 # Python script - use DeepSpeed launcher
@@ -938,27 +1085,31 @@ def _prepare_template_context(
                     nnodes=nnodes,
                     nproc_per_node=nproc_per_node,
                     master_port=master_port,
-                    model_script=model_script
+                    model_script=model_script,
                 )
 
         elif launcher_type == "torchtitan":
             if nnodes > 1:
                 create_headless_service = True
-                self.console.print(f"[dim]Multi-node TorchTitan: Creating headless service for pod discovery[/dim]")
-            
+                self.console.print(
+                    f"[dim]Multi-node TorchTitan: Creating headless service for pod discovery[/dim]"
+                )
+
             # Generate TorchTitan launcher command
             launcher_command = self._generate_torchtitan_command(
                 nnodes=nnodes,
                 nproc_per_node=nproc_per_node,
                 master_port=master_port,
-                model_script=model_info.get("scripts", "run.sh")
+                model_script=model_info.get("scripts", "run.sh"),
             )
 
         elif launcher_type == "vllm":
             if nnodes > 1:
                 create_headless_service = True
-                self.console.print(f"[dim]Multi-node vLLM: Creating headless service for Ray cluster[/dim]")
-            
+                self.console.print(
+                    f"[dim]Multi-node vLLM: Creating headless service for Ray cluster[/dim]"
+                )
+
             # Generate vLLM launcher command (pass model args so run.sh gets --model_repo etc.)
             launcher_command = self._generate_vllm_command(
                 nnodes=nnodes,
@@ -971,8 +1122,10 @@ def _prepare_template_context(
         elif launcher_type == "sglang":
             if nnodes > 1:
                 create_headless_service = True
-                self.console.print(f"[dim]Multi-node SGLang: Creating headless service for Ray cluster[/dim]")
-            
+                self.console.print(
+                    f"[dim]Multi-node SGLang: Creating headless service for Ray cluster[/dim]"
+                )
+
             # Generate SGLang launcher command (pass model args so run.sh gets CLI args)
             launcher_command = self._generate_sglang_command(
                 nnodes=nnodes,
@@ -988,38 +1141,46 @@ def _prepare_template_context(
                     f"SGLang Disaggregated requires minimum 3 nodes "
                     f"(1 proxy + 1 prefill + 1 decode), got {nnodes}"
                 )
-            
+
             # Always create headless service for disaggregated architecture
             create_headless_service = True
-            self.console.print(f"[dim]SGLang Disaggregated: Creating headless service for {nnodes} pods[/dim]")
-            self.console.print(f"[dim]  Architecture: 1 proxy + {max(1, (nnodes-1)*2//5)} prefill + {nnodes-1-max(1, (nnodes-1)*2//5)} decode[/dim]")
-            
+            self.console.print(
+                f"[dim]SGLang Disaggregated: Creating headless service for {nnodes} pods[/dim]"
+            )
+            self.console.print(
+                f"[dim]  Architecture: 1 proxy + {max(1, (nnodes-1)*2//5)} prefill + {nnodes-1-max(1, (nnodes-1)*2//5)} decode[/dim]"
+            )
+
             # Generate SGLang Disaggregated launcher command
             launcher_command = self._generate_sglang_disagg_command(
                 nnodes=nnodes,
                 nproc_per_node=nproc_per_node,
                 master_port=master_port,
-                model_script=model_info.get("scripts", "run.sh")
+                model_script=model_info.get("scripts", "run.sh"),
             )
 
         elif launcher_type == "megatron":
             if nnodes > 1:
                 create_headless_service = True
-                self.console.print(f"[dim]Multi-node Megatron-LM: Creating headless service for pod discovery[/dim]")
-            
+                self.console.print(
+                    f"[dim]Multi-node Megatron-LM: Creating headless service for pod discovery[/dim]"
+                )
+
             # Generate Megatron-LM launcher command
             launcher_command = self._generate_megatron_command(
                 nnodes=nnodes,
                 nproc_per_node=nproc_per_node,
                 master_port=master_port,
-                model_script=model_info.get("scripts", "run.sh")
+                model_script=model_info.get("scripts", "run.sh"),
             )
 
         elif launcher_type == "primus":
             if nnodes > 1:
                 create_headless_service = True
-                self.console.print(f"[dim]Multi-node Primus: Creating headless service for pod discovery[/dim]")
-            
+                self.console.print(
+                    f"[dim]Multi-node Primus: Creating headless service for pod discovery[/dim]"
+                )
+
             # Generate Primus launcher command (env-only: PRIMUS_CONFIG_PATH, PRIMUS_CLI_EXTRA)
             launcher_command = self._generate_primus_command(
                 nnodes=nnodes,
@@ -1029,7 +1190,9 @@ def _prepare_template_context(
                 model_args=model_info.get("args", "") or "",
                 model_name=model_info.get("name", "") or "",
             )
-            primus_cfg = merged_primus_config(self.manifest, self.config.additional_context)
+            primus_cfg = merged_primus_config(
+                self.manifest, self.config.additional_context
+            )
             backend_hint = (primus_cfg.get("backend") or "").strip().lower()
             inferred_backend = infer_primus_backend_from_model_name(
                 model_info.get("name", "") or ""
@@ -1050,23 +1213,25 @@ def _prepare_template_context(
         # Prepare pre/post scripts (similar to local execution)
         pre_scripts = []
         post_scripts = []
-        
+
         # Get pre/post scripts from manifest context if available
         if "context" in self.manifest:
             if "pre_scripts" in self.manifest["context"]:
                 pre_scripts.extend(self.manifest["context"]["pre_scripts"])
             if "post_scripts" in self.manifest["context"]:
                 post_scripts.extend(self.manifest["context"]["post_scripts"])
-        
+
         # Add system environment collection (rocEnvTool) - same as local execution
         # This is controlled by generate_sys_env_details flag (default: True)
-        generate_sys_env_details = self.config.additional_context.get("generate_sys_env_details", True)
+        generate_sys_env_details = self.config.additional_context.get(
+            "generate_sys_env_details", True
+        )
         if generate_sys_env_details:
             self.gather_system_env_details(pre_scripts, model_info["name"])
-        
+
         # Add tool pre/post scripts to the execution lists (like local execution)
         self._add_tool_scripts(pre_scripts, post_scripts)
-        
+
         # Load pre/post script contents for ConfigMap (since madengine not installed in container)
         pre_post_script_contents = self._load_common_scripts(pre_scripts + post_scripts)
 
@@ -1122,9 +1287,7 @@ def _prepare_template_context(
             # Job metadata
             "job_name": self.job_name,
             "job_label": self.job_label,
-            "main_container_name": getattr(
-                self, "main_container_name", None
-            )
+            "main_container_name": getattr(self, "main_container_name", None)
             or sanitize_k8s_container_name(self.job_name),
             "namespace": self.namespace,
             "model_name": model_name,
@@ -1169,7 +1332,9 @@ def _prepare_template_context(
             "host_ipc": nnodes > 1,  # Enable for multi-node
             "subdomain": subdomain_val,
             # Execution
-            "gpu_visibility": ",".join(str(i) for i in range(gpu_count)),  # e.g., "0" for 1 GPU, "0,1" for 2 GPUs
+            "gpu_visibility": ",".join(
+                str(i) for i in range(gpu_count)
+            ),  # e.g., "0" for 1 GPU, "0,1" for 2 GPUs
             "gpu_architecture": self.manifest.get("context", {}).get(
                 "gpu_architecture", "gfx90a"
             ),
@@ -1184,7 +1349,7 @@ def _prepare_template_context(
             "env_vars": self._prepare_env_vars(model_info),
             # Volumes
             "results_pvc": f"{self.job_name}-results",  # Always create a PVC for results
-            "pvc_name": f"{self.job_name}-results",      # PVC name for template
+            "pvc_name": f"{self.job_name}-results",  # PVC name for template
             "data_pvc": self.k8s_config.get("data_pvc"),
             # Multi-node
             "create_headless_service": create_headless_service,
@@ -1195,9 +1360,13 @@ def _prepare_template_context(
             # Tools configuration - from manifest.context or additional_context
             "tools_config": self._get_tools_config(),
             # Tool command chains (pre-built for template)
-            "launcher_tool_chain": self._build_tool_command_chain(
-                self._get_tools_config(), "bash /tmp/run_launcher.sh"
-            ) if launcher_command else None,
+            "launcher_tool_chain": (
+                self._build_tool_command_chain(
+                    self._get_tools_config(), "bash /tmp/run_launcher.sh"
+                )
+                if launcher_command
+                else None
+            ),
             "direct_script_tool_chain": self._build_tool_command_chain(
                 self._get_tools_config(), f"bash {model_info.get('scripts', 'run.sh')}"
             ),
@@ -1218,98 +1387,101 @@ def _prepare_template_context(
             )
 
         return context
-    
+
     def _get_tools_config(self) -> List[Dict]:
         """
         Get tools configuration from manifest.context or additional_context.
-        
+
         Prioritizes runtime additional_context, falls back to manifest.context.
-        
+
         For multi-node runs:
         - Checks rocprofv3 availability (required for MPI profiling)
         - Upgrades "rocprof" to "rocprofv3" for multi-node compatibility
         - Logs warnings if rocprofv3 not available
-        
+
         Returns:
             List of tool configurations (enriched with cmd from tools.json)
         """
         # Cache the result to avoid repeated expensive checks and duplicate warnings
-        if hasattr(self, '_cached_tools_config'):
+        if hasattr(self, "_cached_tools_config"):
             return self._cached_tools_config
-        
+
         # Check runtime additional_context first (allows runtime override)
         tools = self.config.additional_context.get("tools", [])
-        
+
         # Fall back to manifest.context if no runtime tools
         if not tools and "context" in self.manifest:
             tools = self.manifest["context"].get("tools", [])
-        
+
         # Apply multi-node profiling logic if applicable
         distributed_config = self.config.additional_context.get("distributed", {})
         nnodes = distributed_config.get("nnodes", 1)
-        
+
         if nnodes > 1 and tools:
             # Configure multi-node profiling (handles rocprofv3 detection and tool upgrades)
             # Create a simple logger wrapper for configure_multi_node_profiling
             class ConsoleLogger:
                 def __init__(self, console):
                     self.console = console
+
                 def info(self, msg):
                     self.console.print(f"[cyan]{msg}[/cyan]")
+
                 def warning(self, msg):
                     self.console.print(f"[yellow]{msg}[/yellow]")
+
                 def debug(self, msg):
                     pass  # Skip debug messages in console
-            
+
             profiling_config = configure_multi_node_profiling(
-                nnodes=nnodes,
-                tools_config=tools,
-                logger=ConsoleLogger(self.console)
+                nnodes=nnodes, tools_config=tools, logger=ConsoleLogger(self.console)
             )
-            
+
             if profiling_config["enabled"]:
                 tools = profiling_config["tools"]
             else:
                 # rocprofv3 not available - skip profiling for multi-node
                 tools = []
-        
+
         # Enrich tools with cmd from tools.json for K8s template usage
         result = self._enrich_tools_with_cmd(tools)
-        
+
         # Cache the result for subsequent calls
         self._cached_tools_config = result
         return result
-    
-    def _build_tool_command_chain(self, tools_config: List[Dict], base_command: str) -> str:
+
+    def _build_tool_command_chain(
+        self, tools_config: List[Dict], base_command: str
+    ) -> str:
         """
         Build a command chain from multiple tools, wrapping the base command.
-        
+
         Tools are chained from outermost to innermost:
         tool_n wraps tool_2 wraps tool_1 wraps base_command
-        
+
         Each tool's OUTPUT_FILE env var is set inline to avoid conflicts.
-        
+
         Args:
             tools_config: List of enriched tool configurations
             base_command: The base command to wrap (e.g., "bash /tmp/run_launcher.sh")
-            
+
         Returns:
             Complete command chain string
         """
         if not tools_config:
             return base_command
-        
+
         # Filter tools that have a cmd field
         tools_with_cmd = [t for t in tools_config if t.get("cmd")]
-        
+
         if not tools_with_cmd:
             return base_command
-        
+
         # Build command chain from inside out (reverse order)
         cmd_chain = base_command
         for tool in reversed(tools_with_cmd):
             tool_cmd = tool["cmd"].replace("../scripts/common/", "scripts/common/")
-            
+
             # Set OUTPUT_FILE inline for this specific tool (if defined in tool's env_vars)
             tool_env_vars = tool.get("env_vars", {})
             if "OUTPUT_FILE" in tool_env_vars:
@@ -1318,196 +1490,224 @@ def _build_tool_command_chain(self, tools_config: List[Dict], base_command: str)
                 cmd_chain = f"OUTPUT_FILE={output_file} {tool_cmd} {cmd_chain}"
             else:
                 cmd_chain = f"{tool_cmd} {cmd_chain}"
-        
+
         return cmd_chain
-    
+
     def _enrich_tools_with_cmd(self, tools: List[Dict]) -> List[Dict]:
         """
         Enrich tools configuration with cmd field from tools.json.
-        
+
         This is needed for K8s template to generate the correct encapsulation command.
-        
+
         Args:
             tools: List of tool configurations (may only have 'name' field)
-            
+
         Returns:
             Enriched list with 'cmd' field added from tools.json
         """
         if not tools:
             return tools
-        
+
         # Load tools.json
-        tools_json_path = Path(__file__).parent.parent / "scripts" / "common" / "tools.json"
+        tools_json_path = (
+            Path(__file__).parent.parent / "scripts" / "common" / "tools.json"
+        )
         if not tools_json_path.exists():
-            self.console.print(f"[yellow]Warning: tools.json not found at {tools_json_path}[/yellow]")
+            self.console.print(
+                f"[yellow]Warning: tools.json not found at {tools_json_path}[/yellow]"
+            )
             return tools
-        
+
         with open(tools_json_path, "r") as f:
             tools_definitions = json.load(f)
-        
+
         enriched_tools = []
         for tool in tools:
             tool_name = tool.get("name")
             if not tool_name:
                 enriched_tools.append(tool)
                 continue
-            
+
             # Get tool definition from tools.json
             if tool_name not in tools_definitions.get("tools", {}):
-                self.console.print(f"[yellow]Warning: Tool '{tool_name}' not found in tools.json[/yellow]")
+                self.console.print(
+                    f"[yellow]Warning: Tool '{tool_name}' not found in tools.json[/yellow]"
+                )
                 enriched_tools.append(tool)
                 continue
-            
+
             tool_def = tools_definitions["tools"][tool_name]
-            
+
             # Create enriched tool config with cmd
             enriched_tool = tool.copy()
             if "cmd" not in enriched_tool and "cmd" in tool_def:
                 enriched_tool["cmd"] = tool_def["cmd"]
-            
+
             # Also copy env_vars if present
             if "env_vars" not in enriched_tool and "env_vars" in tool_def:
                 enriched_tool["env_vars"] = tool_def["env_vars"]
-            
+
             enriched_tools.append(enriched_tool)
-        
+
         return enriched_tools
 
     def _load_k8s_tools(self) -> Dict:
         """
         Load K8s-specific tools configuration.
-        
+
         Returns:
             Dict with K8s tools configuration
         """
         k8s_tools_file = Path(__file__).parent.parent / "scripts" / "k8s" / "tools.json"
-        
+
         if k8s_tools_file.exists():
             try:
                 with open(k8s_tools_file, "r") as f:
                     return json.load(f)
             except Exception as e:
-                self.console.print(f"[yellow]Warning: Failed to load K8s tools config: {e}[/yellow]")
+                self.console.print(
+                    f"[yellow]Warning: Failed to load K8s tools config: {e}[/yellow]"
+                )
                 return {}
         else:
-            self.console.print(f"[yellow]Warning: K8s tools.json not found at {k8s_tools_file}[/yellow]")
+            self.console.print(
+                f"[yellow]Warning: K8s tools.json not found at {k8s_tools_file}[/yellow]"
+            )
             return {}
-    
+
     def _prepare_env_vars(self, model_info: Dict) -> Dict[str, str]:
         """
         Prepare environment variables from multiple sources.
-        
+
         Merges env vars from:
         1. Base additional_context
         2. Data provider
         3. Tools configuration
-        
+
         Args:
             model_info: Model configuration
-            
+
         Returns:
             Merged environment variables dict
         """
         env_vars = {}
-        
+
         # 1. Base environment variables from additional_context
         base_env = self.config.additional_context.get("env_vars", {})
         env_vars.update(base_env)
-        
+
         # 1b. Critical ROCm environment variable (if not already set)
         # HSA_NO_SCRATCH_RECLAIM=1 required for AMD MI300X and newer GPUs
         # Prevents performance degradation and NCCL errors
         if "HSA_NO_SCRATCH_RECLAIM" not in env_vars:
             env_vars["HSA_NO_SCRATCH_RECLAIM"] = "1"
-        
+
         # 2. Data provider environment variables
         data_config = self._prepare_data_config(model_info)
         if data_config:
             if "env_vars" in data_config:
                 # Exclude MAD_DATAHOME from data provider's env vars (we set it explicitly below for K8s)
-                data_provider_env = {k: v for k, v in data_config["env_vars"].items() if k != "MAD_DATAHOME"}
+                data_provider_env = {
+                    k: v
+                    for k, v in data_config["env_vars"].items()
+                    if k != "MAD_DATAHOME"
+                }
                 env_vars.update(data_provider_env)
             # Always set MAD_DATAHOME for K8s (PVC mount point /data, not /data_dlm_0)
             if "datahome" in data_config:
                 env_vars["MAD_DATAHOME"] = data_config["datahome"]
-        
+
         # 3. Tools configuration environment variables
         # Check both additional_context and manifest.context for tools
         tools_config = self.config.additional_context.get("tools", [])
         if not tools_config and "context" in self.manifest:
             tools_config = self.manifest["context"].get("tools", [])
-        
+
         for tool in tools_config:
             if "env_vars" in tool:
                 # Skip OUTPUT_FILE as it's set inline in command chain to avoid conflicts
-                tool_env_vars = {k: v for k, v in tool["env_vars"].items() if k != "OUTPUT_FILE"}
+                tool_env_vars = {
+                    k: v for k, v in tool["env_vars"].items() if k != "OUTPUT_FILE"
+                }
                 env_vars.update(tool_env_vars)
-        
+
         return env_vars
-    
+
     def _prepare_data_config(self, model_info: Dict) -> Optional[Dict]:
         """
         Prepare data provider configuration for K8s pod.
-        
+
         Args:
             model_info: Model configuration
-            
+
         Returns:
             Data configuration dict or None
         """
         if "data" not in model_info or not model_info["data"]:
             return None
-        
+
         # Initialize data provider if needed
         if not self.data:
             try:
                 # Create minimal context for data provider
                 # We only need the data.json file to be present
                 import os
+
                 data_json_file = "data.json"
                 if os.path.exists(data_json_file):
                     # Import Context and create minimal instance
                     # Data provider needs this to function
-                    self.context_for_data = type('obj', (object,), {
-                        'ctx': {},
-                        'sh': lambda cmd: os.popen(cmd).read().strip()
-                    })()
+                    self.context_for_data = type(
+                        "obj",
+                        (object,),
+                        {"ctx": {}, "sh": lambda cmd: os.popen(cmd).read().strip()},
+                    )()
                     self.data = Data(
                         self.context_for_data,
                         filename=data_json_file,
-                        force_mirrorlocal=False
+                        force_mirrorlocal=False,
                     )
                 else:
-                    self.console.print("[yellow]Warning: data.json not found, data provider unavailable[/yellow]")
+                    self.console.print(
+                        "[yellow]Warning: data.json not found, data provider unavailable[/yellow]"
+                    )
                     return None
             except Exception as e:
-                self.console.print(f"[yellow]Warning: Could not initialize data provider: {e}[/yellow]")
+                self.console.print(
+                    f"[yellow]Warning: Could not initialize data provider: {e}[/yellow]"
+                )
                 return None
-        
+
         try:
             # Get data environment variables
             data_env = self.data.get_env(model_info["data"])
-            
+
             # Find data provider for this data
             dp = self.data.find_dataprovider(model_info["data"])
             if not dp:
-                self.console.print(f"[yellow]Warning: Data provider not found for {model_info['data']}[/yellow]")
+                self.console.print(
+                    f"[yellow]Warning: Data provider not found for {model_info['data']}[/yellow]"
+                )
                 return None
-            
+
             # Get provider type and source path
-            provider_type = dp.provider_type if hasattr(dp, 'provider_type') else "local"
-            source_url = dp.config.get("path", "") if hasattr(dp, 'config') else ""
-            
+            provider_type = (
+                dp.provider_type if hasattr(dp, "provider_type") else "local"
+            )
+            source_url = dp.config.get("path", "") if hasattr(dp, "config") else ""
+
             # K8s best practice: Always use /data (PVC mount point)
             # PVC provides persistent, shared storage across all pods/nodes
             # Separation of storage (PVC) from compute (pods) is K8s standard
             # FORCE datahome to /data for K8s (override data provider's default /data_dlm_0)
-            
+
             # Filter out MAD_DATAHOME from data provider env vars (will be set explicitly below)
-            filtered_data_env = {k: v for k, v in (data_env or {}).items() if k != "MAD_DATAHOME"}
+            filtered_data_env = {
+                k: v for k, v in (data_env or {}).items() if k != "MAD_DATAHOME"
+            }
             # Add MAD_DATAHOME with correct K8s value
             filtered_data_env["MAD_DATAHOME"] = "/data"
-            
+
             return {
                 "data_name": model_info["data"],
                 "env_vars": filtered_data_env,
@@ -1516,7 +1716,9 @@ def _prepare_data_config(self, model_info: Dict) -> Optional[Dict]:
                 "datahome": "/data",  # Always use PVC mount point for K8s
             }
         except Exception as e:
-            self.console.print(f"[yellow]Warning: Could not prepare data config: {e}[/yellow]")
+            self.console.print(
+                f"[yellow]Warning: Could not prepare data config: {e}[/yellow]"
+            )
             return None
 
     def _save_debug_manifests(self):
@@ -1534,9 +1736,7 @@ def _save_debug_manifests(self):
         if self.service_yaml:
             (output_dir / "service.yaml").write_text(self.service_yaml)
 
-        self.console.print(
-            f"[yellow]Debug: Manifests saved to {output_dir}[/yellow]"
-        )
+        self.console.print(f"[yellow]Debug: Manifests saved to {output_dir}[/yellow]")
 
     def _k8s_data_storage_class(self) -> Optional[str]:
         """StorageClass for long-lived ``madengine-shared-data`` (NFS RWX recommended)."""
@@ -1598,7 +1798,7 @@ def _create_results_pvc(self, nnodes: int = 1) -> str:
             storage_size=self.k8s_config.get("results_storage_size", "10Gi"),
             storage_class=storage_class,
         )
-        
+
         # Create PVC (retry on 409 "object is being deleted" until it is gone)
         pvc_dict = yaml.safe_load(pvc_yaml)
         max_create_retries = 6
@@ -1610,7 +1810,11 @@ def _create_results_pvc(self, nnodes: int = 1) -> str:
                 )
                 return pvc_name
             except ApiException as e:
-                if e.status == 409 and e.body and "object is being deleted" in (e.body or ""):
+                if (
+                    e.status == 409
+                    and e.body
+                    and "object is being deleted" in (e.body or "")
+                ):
                     if attempt < max_create_retries - 1:
                         self.console.print(
                             f"[dim]PVC still terminating, waiting {create_wait_seconds}s before retry ({attempt + 1}/{max_create_retries})[/dim]"
@@ -1620,7 +1824,7 @@ def _create_results_pvc(self, nnodes: int = 1) -> str:
                         raise
                 else:
                     raise
-    
+
     def _wait_for_pvc_deleted(self, pvc_name: str, max_wait: int = 90) -> None:
         """Block until the PVC is fully removed (or timeout)."""
         for i in range(max_wait):
@@ -1744,7 +1948,7 @@ def _create_or_get_data_pvc(self, nnodes: int = 1) -> str:
             )
 
         return pvc_name
-    
+
     def _cleanup_existing_resources(self):
         """Delete existing Job, ConfigMap, and Service if they exist."""
         # Delete existing Job
@@ -1752,7 +1956,7 @@ def _cleanup_existing_resources(self):
             self.batch_v1.delete_namespaced_job(
                 name=self.job_name,
                 namespace=self.namespace,
-                propagation_policy="Background"
+                propagation_policy="Background",
             )
             self.console.print(f"[dim]Deleted existing Job: {self.job_name}[/dim]")
         except ApiException as e:
@@ -1766,62 +1970,64 @@ def _cleanup_existing_resources(self):
             )
         except ApiException:
             pass
-        
+
         # Delete existing ConfigMap
         try:
             self.core_v1.delete_namespaced_config_map(
-                name=self.configmap_name,
-                namespace=self.namespace
+                name=self.configmap_name, namespace=self.namespace
+            )
+            self.console.print(
+                f"[dim]Deleted existing ConfigMap: {self.configmap_name}[/dim]"
             )
-            self.console.print(f"[dim]Deleted existing ConfigMap: {self.configmap_name}[/dim]")
         except ApiException as e:
             if e.status != 404:
                 pass
-        
+
         # Delete existing Service
-        if hasattr(self, 'service_yaml') and self.service_yaml:
+        if hasattr(self, "service_yaml") and self.service_yaml:
             try:
                 self.core_v1.delete_namespaced_service(
-                    name=self.service_name,
-                    namespace=self.namespace
+                    name=self.service_name, namespace=self.namespace
+                )
+                self.console.print(
+                    f"[dim]Deleted existing Service: {self.service_name}[/dim]"
                 )
-                self.console.print(f"[dim]Deleted existing Service: {self.service_name}[/dim]")
             except ApiException as e:
                 if e.status != 404:
                     pass
-        
+
         # Delete existing collector pod (must be done before PVC to allow PVC deletion)
         collector_pod_name = f"collector-{self.job_name}"
         try:
             self.core_v1.delete_namespaced_pod(
                 name=collector_pod_name,
                 namespace=self.namespace,
-                grace_period_seconds=0
+                grace_period_seconds=0,
+            )
+            self.console.print(
+                f"[dim]Deleted existing collector pod: {collector_pod_name}[/dim]"
             )
-            self.console.print(f"[dim]Deleted existing collector pod: {collector_pod_name}[/dim]")
             # Wait a moment for pod to release the PVC
             time.sleep(2)
         except ApiException as e:
             if e.status != 404:
                 pass
-        
+
         # Delete existing PVC
         pvc_name = f"{self.job_name}-results"
         try:
             self.core_v1.delete_namespaced_persistent_volume_claim(
-                name=pvc_name,
-                namespace=self.namespace
+                name=pvc_name, namespace=self.namespace
             )
             self.console.print(f"[dim]Deleted existing PVC: {pvc_name}[/dim]")
-            
+
             # Wait for PVC to be fully deleted (not just marked for deletion)
             max_wait = 90  # Maximum 90 seconds (PV can take time to detach)
             wait_interval = 1  # Check every 1 second
             for i in range(max_wait):
                 try:
                     self.core_v1.read_namespaced_persistent_volume_claim(
-                        name=pvc_name,
-                        namespace=self.namespace
+                        name=pvc_name, namespace=self.namespace
                     )
                     if i > 0 and i % 10 == 0:
                         self.console.print(
@@ -1835,7 +2041,7 @@ def _cleanup_existing_resources(self):
         except ApiException as e:
             if e.status != 404:
                 pass
-        
+
         # Wait a moment for other resources to be deleted
         time.sleep(1)
 
@@ -1844,22 +2050,22 @@ def deploy(self) -> DeploymentResult:
         try:
             # Clean up any existing resources first
             self._cleanup_existing_resources()
-            
+
             # 1. Create PVC for results storage
             self.console.print("[blue]Creating PVC for results storage...[/blue]")
             nnodes_deploy = getattr(self, "_nnodes", 1)
             pvc_name = self._create_results_pvc(nnodes=nnodes_deploy)
             self.console.print(f"[green]✓ Created PVC: {pvc_name}[/green]")
-            
+
             # 1b. Create or reuse data PVC if data provider is configured and auto-creation was flagged
-            if hasattr(self, '_data_config') and self._data_config:
+            if hasattr(self, "_data_config") and self._data_config:
                 # Check if we set the PVC name during prepare (auto-creation case)
                 data_pvc_name = self.k8s_config.get("data_pvc")
                 if data_pvc_name == "madengine-shared-data":
                     # Auto-creation mode: create/reuse the PVC
-                    nnodes = getattr(self, '_nnodes', 1)
+                    nnodes = getattr(self, "_nnodes", 1)
                     self._create_or_get_data_pvc(nnodes=nnodes)
-            
+
             # 2. Create Secrets from local credential.json (strategy: from_local_credentials)
             merged_sec = merge_secrets_config(self.k8s_config)
             strategy = merged_sec.get("strategy", SECRETS_STRATEGY_FROM_LOCAL)
@@ -1890,7 +2096,9 @@ def deploy(self) -> DeploymentResult:
                 self.core_v1.create_namespaced_service(
                     namespace=self.namespace, body=service_dict
                 )
-                self.console.print(f"[green]✓ Created Service: {self.service_name}[/green]")
+                self.console.print(
+                    f"[green]✓ Created Service: {self.service_name}[/green]"
+                )
 
             # 5. Create Job
             self.console.print("[blue]Creating Job...[/blue]")
@@ -1928,18 +2136,18 @@ def deploy(self) -> DeploymentResult:
     def monitor(self, deployment_id: str) -> DeploymentResult:
         """
         Monitor Job status using Python API.
-        
+
         If live_output is enabled, streams pod logs in real-time.
         Otherwise, polls status periodically.
         """
         # Check if live output is requested
         live_output = self.config.additional_context.get("live_output", False)
-        
+
         if live_output:
             return self._monitor_with_live_logs(deployment_id)
         else:
             return self._monitor_status_only(deployment_id)
-    
+
     def _monitor_status_only(self, deployment_id: str) -> DeploymentResult:
         """Monitor Job status without streaming logs."""
         try:
@@ -1985,21 +2193,23 @@ def _monitor_status_only(self, deployment_id: str) -> DeploymentResult:
                     message=f"Job {deployment_id} not found",
                 )
             raise
-    
+
     def _monitor_with_live_logs(self, deployment_id: str) -> DeploymentResult:
         """Monitor Job and stream logs in real-time."""
-        self.console.print(f"\n[cyan]═══ Streaming pod logs (--live-output) ═══[/cyan]\n")
-        
+        self.console.print(
+            f"\n[cyan]═══ Streaming pod logs (--live-output) ═══[/cyan]\n"
+        )
+
         pod_name = None
         log_position = 0
-        
+
         while True:
             try:
                 # Check job status
                 job = self.batch_v1.read_namespaced_job_status(
                     name=deployment_id, namespace=self.namespace
                 )
-                
+
                 # Get pod if we don't have it yet
                 if not pod_name:
                     pods = self.core_v1.list_namespaced_pod(
@@ -2008,8 +2218,10 @@ def _monitor_with_live_logs(self, deployment_id: str) -> DeploymentResult:
                     )
                     if pods.items:
                         pod_name = pods.items[0].metadata.name
-                        self.console.print(f"[dim]Following logs from pod: {pod_name}[/dim]\n")
-                
+                        self.console.print(
+                            f"[dim]Following logs from pod: {pod_name}[/dim]\n"
+                        )
+
                 # Stream logs if we have a pod
                 if pod_name:
                     try:
@@ -2017,31 +2229,33 @@ def _monitor_with_live_logs(self, deployment_id: str) -> DeploymentResult:
                         logs = self.core_v1.read_namespaced_pod_log(
                             name=pod_name,
                             namespace=self.namespace,
-                            tail_lines=100 if log_position == 0 else None
+                            tail_lines=100 if log_position == 0 else None,
                         )
-                        
+
                         # Print new log lines and trigger artifact collection
                         if logs:
-                            log_lines = logs.split('\n')
+                            log_lines = logs.split("\n")
                             if len(log_lines) > log_position:
                                 for line in log_lines[log_position:]:
                                     if line.strip():
                                         print(line)
                                 log_position = len(log_lines)
-                    
+
                     except ApiException as e:
                         if e.status != 400:  # Ignore "container not ready" errors
                             pass
-                
+
                 # Check if job completed
                 if job.status.succeeded:
-                    self.console.print(f"\n[green]✓ Job {deployment_id} completed successfully[/green]\n")
+                    self.console.print(
+                        f"\n[green]✓ Job {deployment_id} completed successfully[/green]\n"
+                    )
                     return DeploymentResult(
                         status=DeploymentStatus.SUCCESS,
                         deployment_id=deployment_id,
                         message=f"Job {deployment_id} completed successfully",
                     )
-                
+
                 if job.status.failed:
                     self.console.print(f"\n[red]✗ Job {deployment_id} failed[/red]\n")
                     # Print final logs
@@ -2052,9 +2266,9 @@ def _monitor_with_live_logs(self, deployment_id: str) -> DeploymentResult:
                         deployment_id=deployment_id,
                         message=f"Job {deployment_id} failed",
                     )
-                
+
                 time.sleep(2)  # Poll every 2 seconds
-                
+
             except ApiException as e:
                 if e.status == 404:
                     return DeploymentResult(
@@ -2063,24 +2277,22 @@ def _monitor_with_live_logs(self, deployment_id: str) -> DeploymentResult:
                         message=f"Job {deployment_id} not found",
                     )
                 raise
-    
+
     def _print_pod_logs_on_failure(self, deployment_id: str):
         """Print pod logs when job fails (for debugging)."""
         try:
             self.console.print(f"\n[yellow]═══ Pod logs (last 50 lines) ═══[/yellow]\n")
-            
+
             pods = self.core_v1.list_namespaced_pod(
                 namespace=self.namespace,
                 label_selector=_pod_job_name_label_selector(deployment_id),
             )
-            
+
             for pod in pods.items:
                 pod_name = pod.metadata.name
                 try:
                     logs = self.core_v1.read_namespaced_pod_log(
-                        name=pod_name,
-                        namespace=self.namespace,
-                        tail_lines=50
+                        name=pod_name, namespace=self.namespace, tail_lines=50
                     )
                     self.console.print(f"[dim]Pod: {pod_name}[/dim]")
                     print(logs)
@@ -2135,17 +2347,17 @@ def _refresh_pod_until_terminal_phase(
     def collect_results(self, deployment_id: str) -> Dict[str, Any]:
         """
         Enhanced results collection from K8s pods following vLLM multi-node best practices.
-        
+
         For Data Parallel deployments (vLLM, SGLang):
         - Each pod runs an independent replica
         - Only pod-0 reports metrics to avoid duplicates
         - Total throughput = pod-0 throughput × num_replicas
-        
+
         Collects:
         1. Pod logs (``k8s_results/<job>/<pod>/pod.log``)
         2. PVC mirror per pod (``.../<pod>/pvc/``), mapped from ``/results/<subdir>/``
         3. File artifacts via kubectl cp when pods are still running (keep-alive path)
-        
+
         Returns:
             Dict with logs, artifacts, and performance results
         """
@@ -2161,8 +2373,10 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
         # Create results directory for this deployment
         results_dir = Path(f"./k8s_results/{deployment_id}")
         results_dir.mkdir(parents=True, exist_ok=True)
-        
-        self.console.print(f"[cyan]📦 Collecting results from K8s job: {deployment_id}[/cyan]")
+
+        self.console.print(
+            f"[cyan]📦 Collecting results from K8s job: {deployment_id}[/cyan]"
+        )
 
         try:
             # Get pods for this job
@@ -2178,7 +2392,7 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                 model_info = self.manifest["built_models"][model_key]
             else:
                 model_info = {}
-            
+
             # Get build info from built_images
             image_keys = list(self.manifest.get("built_images", {}).keys())
             if image_keys:
@@ -2193,21 +2407,21 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
             is_distributed = distributed_config.get("enabled", False)
             nnodes = distributed_config.get("nnodes", 1)
             is_multinode = is_distributed and nnodes > 1
-            
+
             # Determine launcher_type the same way as _prepare_template_context does
             # (deployment_config doesn't store launcher_type directly)
             launcher_config = self.config.additional_context.get("launcher", {})
             launcher_type = (
-                launcher_config.get("type") 
-                if launcher_config.get("type") is not None 
+                launcher_config.get("type")
+                if launcher_config.get("type") is not None
                 else distributed_config.get("launcher")
             )
-            
+
             # Normalize launcher based on deployment type and validity
             launcher_type = normalize_launcher(launcher_type, "kubernetes")
-            
+
             is_ray_launcher = launcher_type in ["vllm", "sglang"]
-            
+
             # Sort pods by name to ensure consistent ordering (pod-0 is master)
             sorted_pods = sorted(pods.items, key=lambda p: p.metadata.name)
 
@@ -2217,31 +2431,33 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
             # Parse performance from ALL nodes (each reports node-local metrics)
             # Aggregate metrics based on type (sum for throughput, etc.)
             # ========================================================================
-            
+
             per_node_metrics = []  # Store performance from each node
             results["nodes"] = []  # Store per-node details for display
-            
+
             # Special handling for Ray-based launchers (vLLM, SGLang)
             # These report per-replica metrics, need scaling
             if is_multinode and is_ray_launcher:
                 self.console.print(
                     f"[cyan]Multi-node Ray deployment: {nnodes} nodes (Data Parallel mode)[/cyan]"
                 )
-            
+
             # Collect from ALL pods
             for pod_index, pod in enumerate(sorted_pods):
                 pod_name = pod.metadata.name
                 pod_dir = results_dir / pod_name
                 pod_dir.mkdir(exist_ok=True)
-                
+
                 # Extract node rank from pod name (e.g., madengine-dummy-torchrun-0 -> 0)
                 try:
-                    node_rank = int(pod_name.rsplit('-', 1)[-1])
+                    node_rank = int(pod_name.rsplit("-", 1)[-1])
                 except (ValueError, IndexError):
                     node_rank = pod_index
-                
-                self.console.print(f"[dim]  Collecting from pod: {pod_name} (node-{node_rank})[/dim]")
-                
+
+                self.console.print(
+                    f"[dim]  Collecting from pod: {pod_name} (node-{node_rank})[/dim]"
+                )
+
                 try:
                     # 1. Collect pod logs
                     log = self.core_v1.read_namespaced_pod_log(
@@ -2249,37 +2465,41 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                     )
                     log_file = pod_dir / "pod.log"
                     log_file.write_text(log)
-                    results["logs"].append({
-                        "pod": pod_name,
-                        "log": log,
-                        "file": str(log_file)
-                    })
-                    
+                    results["logs"].append(
+                        {"pod": pod_name, "log": log, "file": str(log_file)}
+                    )
+
                     # 2. Parse NODE-LOCAL performance from log
                     perf_data = self._parse_performance_from_log(
                         log, model_info.get("name", "")
                     )
-                    
+
                     # Pod phase/exit can lag right after Job success; poll until terminal or timeout
                     pod = self._refresh_pod_until_terminal_phase(pod_name)
                     pod_status = pod.status.phase if pod else "Unknown"
                     pod_exit_code = (
                         self._primary_workload_container_exit_code(pod) if pod else -1
                     )
-                    
+
                     # Store per-node info for display table
                     node_info = {
                         "node_id": node_rank,
                         "pod_name": pod_name,
-                        "status": "SUCCESS" if pod_status == "Succeeded" and pod_exit_code == 0 else "FAILED",
+                        "status": (
+                            "SUCCESS"
+                            if pod_status == "Succeeded" and pod_exit_code == 0
+                            else "FAILED"
+                        ),
                         "exit_code": pod_exit_code,
-                        "performance": perf_data.get("performance") if perf_data else None,
+                        "performance": (
+                            perf_data.get("performance") if perf_data else None
+                        ),
                         "metric": perf_data.get("metric") if perf_data else None,
                         "duration": perf_data.get("duration") if perf_data else None,
-                        "log_file": str(log_file)
+                        "log_file": str(log_file),
                     }
                     results["nodes"].append(node_info)
-                    
+
                     if perf_data:
                         # For Ray launchers, this is per-replica metric
                         if is_multinode and is_ray_launcher:
@@ -2293,42 +2513,48 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                         self.console.print(
                             f"[dim]  No performance metric found in node-{node_rank} log[/dim]"
                         )
-                        
+
                 except ApiException as e:
                     self.console.print(
                         f"[red]✗ Failed to get logs for pod {pod_name}: {e.reason}[/red]"
                     )
-                    results["nodes"].append({
-                        "node_id": node_rank,
-                        "pod_name": pod_name,
-                        "status": "FAILED",
-                        "exit_code": -1,
-                        "performance": None,
-                        "metric": None,
-                        "error": f"Failed to get logs: {e.reason}"
-                    })
+                    results["nodes"].append(
+                        {
+                            "node_id": node_rank,
+                            "pod_name": pod_name,
+                            "status": "FAILED",
+                            "exit_code": -1,
+                            "performance": None,
+                            "metric": None,
+                            "error": f"Failed to get logs: {e.reason}",
+                        }
+                    )
                 except Exception as e:
                     self.console.print(
                         f"[red]✗ Error collecting from pod {pod_name}: {e}[/red]"
                     )
-                    results["nodes"].append({
-                        "node_id": node_rank,
-                        "pod_name": pod_name,
-                        "status": "FAILED",
-                        "exit_code": -1,
-                        "performance": None,
-                        "metric": None,
-                        "error": str(e)
-                    })
-            
+                    results["nodes"].append(
+                        {
+                            "node_id": node_rank,
+                            "pod_name": pod_name,
+                            "status": "FAILED",
+                            "exit_code": -1,
+                            "performance": None,
+                            "metric": None,
+                            "error": str(e),
+                        }
+                    )
+
             self.console.print(
                 f"[green]✓ Collected logs from {len(results['logs'])} pods[/green]"
             )
-            
+
             # Collect artifacts from PVC before deciding success/failure (needed for multiple_results fallback)
             k8s_pod_names = [p.metadata.name for p in sorted_pods]
-            self._collect_from_pvc(deployment_id, results_dir, results, pod_names=k8s_pod_names)
-            
+            self._collect_from_pvc(
+                deployment_id, results_dir, results, pod_names=k8s_pod_names
+            )
+
             # ========================================================================
             # Aggregate per-node metrics
             # ========================================================================
@@ -2343,7 +2569,7 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                     self.console.print(
                         f"[green]  Total capacity: {aggregated_perf:.1f} req/s ({nnodes} nodes)[/green]"
                     )
-                    
+
                     # Create aggregated record manually for Ray
                     aggregated_record = {
                         "model": per_node_metrics[0]["model"],
@@ -2354,21 +2580,23 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                         "nnodes": nnodes,
                         "launcher": launcher_type or "N/A",
                         "deployment_type": "kubernetes",
-                        "gpu_architecture": per_node_metrics[0].get("gpu_architecture", "N/A"),
+                        "gpu_architecture": per_node_metrics[0].get(
+                            "gpu_architecture", "N/A"
+                        ),
                         "duration": per_node_metrics[0].get("duration", "N/A"),
                         "data_name": per_node_metrics[0].get("data_name", "N/A"),
-                        "data_provider": per_node_metrics[0].get("data_provider", "N/A"),
+                        "data_provider": per_node_metrics[0].get(
+                            "data_provider", "N/A"
+                        ),
                         "aggregation_method": "scaled_by_nnodes",
-                        "nodes_contributing": nnodes
+                        "nodes_contributing": nnodes,
                     }
                 else:
                     # Use new aggregation logic for other launchers
                     aggregated_record = self._aggregate_node_metrics(
-                        per_node_metrics, 
-                        nnodes,
-                        launcher_type
+                        per_node_metrics, nnodes, launcher_type
                     )
-                
+
                 if aggregated_record:
                     # Full reporting pipeline: perf_entry at project root, then update_* (same as local/SLURM)
                     self._ensure_perf_csv_exists()
@@ -2379,9 +2607,13 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                     with open(perf_entry_path, "w", encoding="utf-8") as f:
                         json.dump(run_details_dict, f, indent=2)
                     if run_details_dict.get("status") == "SUCCESS":
-                        update_perf_csv(perf_csv="perf.csv", single_result=str(perf_entry_path))
+                        update_perf_csv(
+                            perf_csv="perf.csv", single_result=str(perf_entry_path)
+                        )
                     else:
-                        update_perf_csv(perf_csv="perf.csv", exception_result=str(perf_entry_path))
+                        update_perf_csv(
+                            perf_csv="perf.csv", exception_result=str(perf_entry_path)
+                        )
                     scripts_path = model_info.get("scripts", "")
                     scripts_base_dir = scripts_base_dir_from(scripts_path)
                     try:
@@ -2403,13 +2635,17 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                             num_entries=num_entries,
                         )
                     except Exception as e:
-                        self.console.print(f"[yellow]⚠ Could not update perf_super: {e}[/yellow]")
-                    results["successful_runs"].append({
-                        "model": model_info.get("name"),
-                        "perf_data": aggregated_record,
-                        "nodes": results["nodes"],
-                        "per_node_metrics": per_node_metrics
-                    })
+                        self.console.print(
+                            f"[yellow]⚠ Could not update perf_super: {e}[/yellow]"
+                        )
+                    results["successful_runs"].append(
+                        {
+                            "model": model_info.get("name"),
+                            "perf_data": aggregated_record,
+                            "nodes": results["nodes"],
+                            "per_node_metrics": per_node_metrics,
+                        }
+                    )
                     self.console.print(
                         f"[green]✓ Aggregated performance from {len(per_node_metrics)} nodes[/green]"
                     )
@@ -2427,6 +2663,7 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                     gpu_arch = "N/A"
                     if results.get("logs"):
                         import re
+
                         log_content = results["logs"][0].get("log", "")
                         m = re.search(r"(?:🔹\s*)?Name\s*:\s*(gfx\w+)", log_content)
                         if m:
@@ -2460,15 +2697,20 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                     )
                     # Build successful_runs for display (one entry per CSV row)
                     import csv as _csv
+
                     model_name = model_info.get("name", "")
-                    with open(resolved_csv_path, "r", encoding="utf-8", errors="ignore") as f:
+                    with open(
+                        resolved_csv_path, "r", encoding="utf-8", errors="ignore"
+                    ) as f:
                         reader = _csv.DictReader(f)
                         for row in reader:
                             row = {k.strip(): v for k, v in row.items() if k}
                             if row.get("performance") and row.get("metric"):
                                 display_model = f"{model_name}_{row.get('model', '')}"
                                 record = self._create_multiple_result_row_record(
-                                    model_info, build_info, deployment_id,
+                                    model_info,
+                                    build_info,
+                                    deployment_id,
                                     {
                                         "model": display_model,
                                         "performance": row.get("performance"),
@@ -2478,12 +2720,22 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                                     },
                                 )
                                 if record:
-                                    results["successful_runs"].append({
-                                        "model": display_model,
-                                        "perf_data": record,
-                                        "nodes": [],
-                                        "per_node_metrics": [{"model": display_model, "performance": row.get("performance"), "metric": row.get("metric", "")}],
-                                    })
+                                    results["successful_runs"].append(
+                                        {
+                                            "model": display_model,
+                                            "perf_data": record,
+                                            "nodes": [],
+                                            "per_node_metrics": [
+                                                {
+                                                    "model": display_model,
+                                                    "performance": row.get(
+                                                        "performance"
+                                                    ),
+                                                    "metric": row.get("metric", ""),
+                                                }
+                                            ],
+                                        }
+                                    )
                     self.console.print(
                         f"[green]✓ Updated perf.csv, perf_entry.*, perf_super.* (Docker-compatible)[/green]"
                     )
@@ -2499,12 +2751,14 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                             )
                             if record:
                                 self._write_to_perf_csv(record)
-                                results["successful_runs"].append({
-                                    "model": item["model"],
-                                    "perf_data": record,
-                                    "nodes": [],
-                                    "per_node_metrics": [item],
-                                })
+                                results["successful_runs"].append(
+                                    {
+                                        "model": item["model"],
+                                        "perf_data": record,
+                                        "nodes": [],
+                                        "per_node_metrics": [item],
+                                    }
+                                )
                         self.console.print(
                             f"[green]✓ Wrote {len(fallback_metrics)} row(s) from multiple_results to perf.csv[/green]"
                         )
@@ -2515,30 +2769,38 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                         model_info, build_info, deployment_id, error_msg
                     )
                     self._write_to_perf_csv(failure_record)
-                    results["failed_runs"].append({
-                        "model": model_info.get("name", "Unknown"),
-                        "error": error_msg,
-                        "nodes": results["nodes"]
-                    })
+                    results["failed_runs"].append(
+                        {
+                            "model": model_info.get("name", "Unknown"),
+                            "error": error_msg,
+                            "nodes": results["nodes"],
+                        }
+                    )
                     self.console.print(
                         f"[yellow]⚠ No performance metrics found, recorded as FAILED[/yellow]"
                     )
-                elif resolved_csv_path and not REPORTING_AVAILABLE and not results.get("successful_runs"):
+                elif (
+                    resolved_csv_path
+                    and not REPORTING_AVAILABLE
+                    and not results.get("successful_runs")
+                ):
                     # Legacy path ran but produced no valid rows
                     error_msg = "No performance metrics found from any node"
                     failure_record = self._create_failure_record(
                         model_info, build_info, deployment_id, error_msg
                     )
                     self._write_to_perf_csv(failure_record)
-                    results["failed_runs"].append({
-                        "model": model_info.get("name", "Unknown"),
-                        "error": error_msg,
-                        "nodes": results["nodes"]
-                    })
+                    results["failed_runs"].append(
+                        {
+                            "model": model_info.get("name", "Unknown"),
+                            "error": error_msg,
+                            "nodes": results["nodes"],
+                        }
+                    )
                     self.console.print(
                         f"[yellow]⚠ No performance metrics found, recorded as FAILED[/yellow]"
                     )
-            
+
             # 4. Generate summary
             self._generate_results_summary(results, results_dir)
 
@@ -2546,7 +2808,7 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
             self.console.print(f"[yellow]⚠ Results collection incomplete: {e}[/yellow]")
 
         return results
-    
+
     def _collect_artifacts_immediately(self, deployment_id: str, pod_name: str) -> None:
         """
         Collect artifacts immediately from a running pod during the sleep period.
@@ -2556,41 +2818,45 @@ def _collect_artifacts_immediately(self, deployment_id: str, pod_name: str) -> N
             # Create results directory
             results_dir = Path("k8s_results") / deployment_id
             results_dir.mkdir(parents=True, exist_ok=True)
-            
+
             pod_dir = results_dir / pod_name
             pod_dir.mkdir(exist_ok=True)
-            
+
             # Collect artifacts
             artifacts = self._collect_pod_artifacts(pod_name, pod_dir)
-            
+
             if artifacts:
-                self.console.print(f"[green]✓ Collected {len(artifacts)} artifacts from {pod_name}[/green]")
+                self.console.print(
+                    f"[green]✓ Collected {len(artifacts)} artifacts from {pod_name}[/green]"
+                )
             else:
-                self.console.print(f"[yellow]⚠ No artifacts collected from {pod_name}[/yellow]")
-                
+                self.console.print(
+                    f"[yellow]⚠ No artifacts collected from {pod_name}[/yellow]"
+                )
+
         except Exception as e:
             self.console.print(f"[yellow]⚠ Error collecting artifacts: {e}[/yellow]")
-    
+
     def _collect_pod_artifacts(self, pod_name: str, dest_dir: Path) -> List[Dict]:
         """
         Collect file artifacts from pod using kubectl cp.
-        
+
         Collects:
         - perf.csv (performance results)
         - *_env.csv (environment details from rocEnvTool)
         - profiling outputs (rocprof*, results*, *.db)
         - tracing outputs (*_output/ directories)
         - tool-specific outputs
-        
+
         Args:
             pod_name: Name of the Kubernetes pod
             dest_dir: Local directory to save artifacts
-            
+
         Returns:
             List of collected artifact metadata
         """
         artifacts = []
-        
+
         # Define artifact patterns to collect
         artifact_patterns = [
             {"pattern": "perf.csv", "type": "performance"},
@@ -2598,55 +2864,69 @@ def _collect_pod_artifacts(self, pod_name: str, dest_dir: Path) -> List[Dict]:
             {"pattern": "results*", "type": "profiling"},
             {"pattern": "*.db", "type": "profiling"},
             {"pattern": "trace.*", "type": "tracing"},
-            {"pattern": "prof.csv", "type": "profiling"},  # Raw profiler output before post-script renames it
+            {
+                "pattern": "prof.csv",
+                "type": "profiling",
+            },  # Raw profiler output before post-script renames it
             {"pattern": "gpu_info_*.csv", "type": "profiling"},
             {"pattern": "library_trace.csv", "type": "tracing"},
         ]
-        
+
         for artifact_def in artifact_patterns:
             pattern = artifact_def["pattern"]
             artifact_type = artifact_def["type"]
-            
+
             try:
                 # Try direct kubectl cp without exec (works during the sleep period)
                 # For patterns with wildcards, try common specific filenames
-                if '*' in pattern:
+                if "*" in pattern:
                     # Expand pattern to specific known files
                     if pattern == "*_env.csv":
-                        specific_files = ["dummy_prof_env.csv", "dummy_data_minio_env.csv"]
+                        specific_files = [
+                            "dummy_prof_env.csv",
+                            "dummy_data_minio_env.csv",
+                        ]
                     elif pattern == "gpu_info_*.csv":
-                        specific_files = ["gpu_info_power_profiler_output.csv", "gpu_info_vram_profiler_output.csv"]
+                        specific_files = [
+                            "gpu_info_power_profiler_output.csv",
+                            "gpu_info_vram_profiler_output.csv",
+                        ]
                     elif pattern == "results*":
                         specific_files = ["results.csv", "results.txt", "results.json"]
                     elif pattern == "trace.*":
                         specific_files = ["trace.txt", "trace.csv", "trace.json"]
                     else:
                         specific_files = []
-                    
+
                     for filename in specific_files:
                         local_path = dest_dir / filename
                         cp_cmd = [
-                            "kubectl", "cp",
+                            "kubectl",
+                            "cp",
                             f"{self.namespace}/{pod_name}:/workspace/{filename}",
-                            str(local_path)
+                            str(local_path),
                         ]
-                        
+
                         cp_result = subprocess.run(
                             cp_cmd, capture_output=True, text=True, timeout=30
                         )
-                        
+
                         if cp_result.returncode == 0 and local_path.exists():
-                            artifacts.append({
-                                "pod": pod_name,
-                                "type": artifact_type,
-                                "source": f"/workspace/{filename}",
-                                "local_path": str(local_path),
-                                "size": local_path.stat().st_size
-                            })
+                            artifacts.append(
+                                {
+                                    "pod": pod_name,
+                                    "type": artifact_type,
+                                    "source": f"/workspace/{filename}",
+                                    "local_path": str(local_path),
+                                    "size": local_path.stat().st_size,
+                                }
+                            )
                             self.console.print(
                                 f"[dim]    ✓ Collected {artifact_type}: {filename}[/dim]"
                             )
-                        elif cp_result.stderr and "No such file" not in cp_result.stderr:
+                        elif (
+                            cp_result.stderr and "No such file" not in cp_result.stderr
+                        ):
                             # Log unexpected errors (but not "file not found")
                             self.console.print(
                                 f"[yellow]    ⚠ Failed to collect {filename}: {cp_result.stderr.strip()}[/yellow]"
@@ -2655,23 +2935,26 @@ def _collect_pod_artifacts(self, pod_name: str, dest_dir: Path) -> List[Dict]:
                     # Direct file - try to copy it
                     local_path = dest_dir / pattern
                     cp_cmd = [
-                        "kubectl", "cp",
+                        "kubectl",
+                        "cp",
                         f"{self.namespace}/{pod_name}:/workspace/{pattern}",
-                        str(local_path)
+                        str(local_path),
                     ]
-                    
+
                     cp_result = subprocess.run(
                         cp_cmd, capture_output=True, text=True, timeout=30
                     )
-                    
+
                     if cp_result.returncode == 0 and local_path.exists():
-                        artifacts.append({
-                            "pod": pod_name,
-                            "type": artifact_type,
-                            "source": f"/workspace/{pattern}",
-                            "local_path": str(local_path),
-                            "size": local_path.stat().st_size
-                        })
+                        artifacts.append(
+                            {
+                                "pod": pod_name,
+                                "type": artifact_type,
+                                "source": f"/workspace/{pattern}",
+                                "local_path": str(local_path),
+                                "size": local_path.stat().st_size,
+                            }
+                        )
                         self.console.print(
                             f"[dim]    ✓ Collected {artifact_type}: {pattern}[/dim]"
                         )
@@ -2680,48 +2963,55 @@ def _collect_pod_artifacts(self, pod_name: str, dest_dir: Path) -> List[Dict]:
                         self.console.print(
                             f"[yellow]    ⚠ Failed to collect {pattern}: {cp_result.stderr.strip()}[/yellow]"
                         )
-                        
+
             except subprocess.TimeoutExpired:
                 pass  # Timeout - skip this file
             except Exception:
                 pass  # File not found or not accessible - this is expected
-        
+
         # Try to collect known output directories using kubectl cp directly (during sleep period)
         output_directories = ["rocprof_output", "rpd_output", "trace_output"]
         for dir_name in output_directories:
             try:
                 local_dir = dest_dir / dir_name
                 cp_cmd = [
-                    "kubectl", "cp",
+                    "kubectl",
+                    "cp",
                     f"{self.namespace}/{pod_name}:/workspace/{dir_name}",
-                    str(local_dir)
+                    str(local_dir),
                 ]
-                
+
                 cp_result = subprocess.run(
                     cp_cmd, capture_output=True, text=True, timeout=60
                 )
-                
+
                 if cp_result.returncode == 0 and local_dir.exists():
                     # Count files in directory
-                    file_count = sum(1 for _ in local_dir.rglob('*') if _.is_file())
+                    file_count = sum(1 for _ in local_dir.rglob("*") if _.is_file())
                     if file_count > 0:
-                        total_size = sum(f.stat().st_size for f in local_dir.rglob('*') if f.is_file())
-                        artifacts.append({
-                            "pod": pod_name,
-                            "type": "tool_output_directory",
-                            "source": f"/workspace/{dir_name}",
-                            "local_path": str(local_dir),
-                            "file_count": file_count,
-                            "size": total_size
-                        })
+                        total_size = sum(
+                            f.stat().st_size
+                            for f in local_dir.rglob("*")
+                            if f.is_file()
+                        )
+                        artifacts.append(
+                            {
+                                "pod": pod_name,
+                                "type": "tool_output_directory",
+                                "source": f"/workspace/{dir_name}",
+                                "local_path": str(local_dir),
+                                "file_count": file_count,
+                                "size": total_size,
+                            }
+                        )
                         self.console.print(
                             f"[dim]    ✓ Collected directory: {dir_name} ({file_count} files, {total_size} bytes)[/dim]"
                         )
             except Exception:
                 pass  # Directory not found - this is expected
-        
+
         return artifacts
-    
+
     def _collect_from_pvc(
         self,
         deployment_id: str,
@@ -2748,22 +3038,31 @@ def _collect_from_pvc(
             pod_names: Full Kubernetes pod names for this job (ordered)
         """
         pvc_name = f"{deployment_id}-results"
-        
+
         try:
             # Create a temporary pod to access PVC
             collector_pod_name = f"collector-{deployment_id[:15]}"
-            
-            self.console.print(f"[dim]📦 Collecting artifacts from PVC: {pvc_name}[/dim]")
-            
+
+            self.console.print(
+                f"[dim]📦 Collecting artifacts from PVC: {pvc_name}[/dim]"
+            )
+
             collector_spec: Dict[str, Any] = {
                 "restartPolicy": "Never",
-                "containers": [{
-                    "name": "collector",
-                    "image": "busybox:latest",
-                    "command": ["sh", "-c", "sleep 600"],
-                    "volumeMounts": [{"name": "results", "mountPath": "/results"}]
-                }],
-                "volumes": [{"name": "results", "persistentVolumeClaim": {"claimName": pvc_name}}]
+                "containers": [
+                    {
+                        "name": "collector",
+                        "image": "busybox:latest",
+                        "command": ["sh", "-c", "sleep 600"],
+                        "volumeMounts": [{"name": "results", "mountPath": "/results"}],
+                    }
+                ],
+                "volumes": [
+                    {
+                        "name": "results",
+                        "persistentVolumeClaim": {"claimName": pvc_name},
+                    }
+                ],
             }
             ips = getattr(self, "_image_pull_secrets_for_pods", None) or []
             if ips:
@@ -2785,10 +3084,10 @@ def _collect_from_pvc(
             except ApiException as e:
                 if e.status != 404:  # 404 means pod doesn't exist, which is fine
                     pass
-            
+
             # Create collector pod
             self.core_v1.create_namespaced_pod(self.namespace, collector_pod_spec)
-            
+
             # Wait for pod to be ready
             for _ in range(30):  # Wait up to 30 seconds
                 try:
@@ -2800,7 +3099,9 @@ def _collect_from_pvc(
                 except ApiException as e:
                     # Pod not found yet or not ready - this is expected during startup
                     if e.status != 404:
-                        self.console.print(f"[dim]Waiting for collector pod (status: {e.status})...[/dim]")
+                        self.console.print(
+                            f"[dim]Waiting for collector pod (status: {e.status})...[/dim]"
+                        )
                 time.sleep(1)
             else:
                 raise Exception("Collector pod did not start in time")
@@ -2874,11 +3175,15 @@ def _collect_from_pvc(
                         str(local_pod_dir),
                     ]
 
-                    cp_result = subprocess.run(cp_cmd, capture_output=True, text=True, timeout=60)
+                    cp_result = subprocess.run(
+                        cp_cmd, capture_output=True, text=True, timeout=60
+                    )
 
                     if cp_result.returncode == 0:
                         # Count collected files
-                        file_count = sum(1 for _ in local_pod_dir.rglob('*') if _.is_file())
+                        file_count = sum(
+                            1 for _ in local_pod_dir.rglob("*") if _.is_file()
+                        )
                         if file_count > 0:
                             art: Dict[str, Any] = {
                                 "source": f"PVC:{pvc_name}/{pod_dir_name}",
@@ -2899,7 +3204,7 @@ def _collect_from_pvc(
                             self.console.print(
                                 f"[dim]    ✓ Collected {file_count} files from {pod_dir_name} → {dest_hint}[/dim]"
                             )
-                
+
                 self.console.print(f"[green]✓ Collected artifacts from PVC[/green]")
             else:
                 hint = ""
@@ -2916,19 +3221,19 @@ def _collect_from_pvc(
                 self.console.print(
                     f"[yellow]⚠ No results found in PVC after retries{hint}[/yellow]"
                 )
-            
+
             # Cleanup collector pod
             self.core_v1.delete_namespaced_pod(
                 collector_pod_name, self.namespace, grace_period_seconds=0
             )
-            
+
         except Exception as e:
             self.console.print(f"[yellow]⚠ Could not collect from PVC: {e}[/yellow]")
-    
+
     def _generate_results_summary(self, results: Dict, results_dir: Path):
         """
         Generate a summary JSON of all collected artifacts.
-        
+
         Args:
             results: Results dict with logs and artifacts
             results_dir: Directory where results are saved
@@ -2950,44 +3255,49 @@ def _generate_results_summary(self, results: Dict, results_dir: Path):
             "successful_runs": len(results["successful_runs"]),
             "failed_runs": len(results["failed_runs"]),
         }
-        
+
         # Group artifacts by type
         for artifact in results["artifacts"]:
             artifact_type = artifact.get("type", "unknown")
-            summary["artifacts_by_type"][artifact_type] = summary["artifacts_by_type"].get(artifact_type, 0) + 1
-        
+            summary["artifacts_by_type"][artifact_type] = (
+                summary["artifacts_by_type"].get(artifact_type, 0) + 1
+            )
+
         summary_file = results_dir / "results_summary.json"
         summary_file.write_text(json.dumps(summary, indent=2))
-        
+
         self.console.print(f"[green]✓ Results summary: {summary_file}[/green]")
-        
+
         # Print summary table if artifacts were collected
         if summary["artifacts_by_type"]:
             from rich.table import Table
+
             table = Table(title="Collected Artifacts")
             table.add_column("Type", style="cyan")
             table.add_column("Count", justify="right", style="green")
-            
+
             for artifact_type, count in sorted(summary["artifacts_by_type"].items()):
                 table.add_row(artifact_type, str(count))
-            
+
             self.console.print(table)
-    
-    def _create_failure_record(self, model_info: Dict, build_info: Dict, pod_name: str, error_msg: str) -> Dict:
+
+    def _create_failure_record(
+        self, model_info: Dict, build_info: Dict, pod_name: str, error_msg: str
+    ) -> Dict:
         """
         Create a failure record for perf.csv when performance metrics are missing.
-        
+
         Args:
             model_info: Model information from manifest
             build_info: Build information from manifest
             pod_name: Kubernetes pod name
             error_msg: Error message describing the failure
-            
+
         Returns:
             Dict with all perf.csv fields marked as FAILED
         """
         import os
-        
+
         # Get topology information for failure record
         deployment_config = self.manifest.get("deployment_config", {})
         distributed_config = deployment_config.get("distributed", {})
@@ -2997,7 +3307,7 @@ def _create_failure_record(self, model_info: Dict, build_info: Dict, pod_name: s
             nproc_per_node = int(model_info.get("n_gpus", 1))
         # Launcher: use distributed.launcher when set, otherwise "native" for k8s
         launcher = normalize_launcher(distributed_config.get("launcher"), "kubernetes")
-        
+
         # Create a record with the same structure as successful runs
         # but with performance=0, metric="", and status="FAILED"
         result = {
@@ -3006,45 +3316,40 @@ def _create_failure_record(self, model_info: Dict, build_info: Dict, pod_name: s
             "n_gpus": str(nnodes * nproc_per_node),
             "nnodes": str(nnodes),
             "gpus_per_node": str(nproc_per_node),
-            
             # Model configuration
             "training_precision": model_info.get("training_precision", ""),
             "pipeline": get_pipeline(),
             "args": model_info.get("args", ""),
             "tags": model_info.get("tags", ""),
-
             # Build information
             "docker_file": build_info.get("dockerfile", ""),
             "base_docker": build_info.get("base_docker", ""),
             "docker_sha": build_info.get("docker_sha", ""),
             "docker_image": build_info.get("docker_image", ""),
-
             # Runtime information
             "git_commit": "",
             "machine_name": pod_name,
             "deployment_type": "kubernetes",
             "launcher": launcher,
             "gpu_architecture": "",
-
             # Performance metrics - FAILED
             "performance": "0",
             "metric": error_msg,  # Store error message in metric field
             "relative_change": "",
             "status": "FAILURE",  # Use "FAILURE" to match CSV schema
-
             # Timing
             "build_duration": build_info.get("build_duration", ""),
             "test_duration": "",
-
             # Data information
             "dataname": model_info.get("data", ""),
             "data_provider_type": "",
             "data_size": "",
             "data_download_duration": "",
-
             # Build tracking
             "build_number": get_build_number(),
-            "additional_docker_run_options": model_info.get("additional_docker_run_options", ""),
+            "additional_docker_run_options": model_info.get(
+                "additional_docker_run_options", ""
+            ),
         }
         flatten_tags_in_place(result)
         return result
@@ -3082,12 +3387,16 @@ def _build_perf_entry_from_aggregated(
         if nproc_per_node is None:
             nproc_per_node = int(model_info.get("n_gpus", 1))
         launcher = normalize_launcher(distributed_config.get("launcher"), "kubernetes")
-        test_duration = aggregated_record.get("test_duration") or aggregated_record.get("duration", "")
+        test_duration = aggregated_record.get("test_duration") or aggregated_record.get(
+            "duration", ""
+        )
         run_details = {
             "model": model_info.get("name", aggregated_record.get("model", "")),
             "n_gpus": str(aggregated_record.get("n_gpus", nnodes * nproc_per_node)),
             "nnodes": str(aggregated_record.get("nnodes", nnodes)),
-            "gpus_per_node": str(aggregated_record.get("gpus_per_node", nproc_per_node)),
+            "gpus_per_node": str(
+                aggregated_record.get("gpus_per_node", nproc_per_node)
+            ),
             "training_precision": model_info.get("training_precision", ""),
             "pipeline": get_pipeline(),
             "args": model_info.get("args", ""),
@@ -3112,7 +3421,9 @@ def _build_perf_entry_from_aggregated(
             "data_size": "",
             "data_download_duration": "",
             "build_number": get_build_number(),
-            "additional_docker_run_options": model_info.get("additional_docker_run_options", ""),
+            "additional_docker_run_options": model_info.get(
+                "additional_docker_run_options", ""
+            ),
         }
         flatten_tags_in_place(run_details)
         try:
@@ -3174,7 +3485,9 @@ def _build_common_info_dict(
             "data_size": "",
             "data_download_duration": "",
             "build_number": get_build_number(),
-            "additional_docker_run_options": model_info.get("additional_docker_run_options", ""),
+            "additional_docker_run_options": model_info.get(
+                "additional_docker_run_options", ""
+            ),
         }
         flatten_tags_in_place(result)
         return result
@@ -3191,14 +3504,14 @@ def _create_multiple_result_row_record(
         Same shape as _create_failure_record but with SUCCESS and item's performance/metric/model.
         """
         import os
-        
+
         deployment_config = self.manifest.get("deployment_config", {})
         distributed_config = deployment_config.get("distributed", {})
         nnodes = distributed_config.get("nnodes", 1)
         nproc_per_node = distributed_config.get("nproc_per_node")
         if nproc_per_node is None:
             nproc_per_node = int(model_info.get("n_gpus", 1))
-        
+
         # Launcher: use distributed.launcher when set, otherwise "native" for k8s
         launcher = normalize_launcher(distributed_config.get("launcher"), "kubernetes")
         result = {
@@ -3230,11 +3543,13 @@ def _create_multiple_result_row_record(
             "data_size": "",
             "data_download_duration": "",
             "build_number": get_build_number(),
-            "additional_docker_run_options": model_info.get("additional_docker_run_options", ""),
+            "additional_docker_run_options": model_info.get(
+                "additional_docker_run_options", ""
+            ),
         }
         flatten_tags_in_place(result)
         return result
-    
+
     def _parse_multiple_results_from_artifacts(
         self,
         results_dir: Path,
@@ -3246,17 +3561,19 @@ def _parse_multiple_results_from_artifacts(
         Parse performance from a multiple_results CSV (e.g. perf_dummy.csv) collected from PVC.
         Used when the model only writes CSV and does not print 'performance: X Y' to the log
         (same contract as local container_runner multiple_results handling).
-        
+
         Returns:
             List of perf_data dicts (same shape as _parse_node_performance), or empty list.
         """
         import csv as csv_module
+
         multiple_results_file = model_info.get("multiple_results")
         filename = Path(multiple_results_file).name if multiple_results_file else None
         # Try to get gpu_architecture from first pod log
         gpu_arch = "N/A"
         if results.get("logs"):
             import re
+
             log_content = results["logs"][0].get("log", "")
             gpu_arch_match = re.search(r"(?:🔹\s*)?Name\s*:\s*(gfx\w+)", log_content)
             if gpu_arch_match:
@@ -3279,7 +3596,11 @@ def _parse_multiple_results_from_artifacts(
                 with open(csv_path, "r", encoding="utf-8", errors="ignore") as f:
                     reader = csv_module.DictReader(f)
                     reader.fieldnames = [f.strip() for f in (reader.fieldnames or [])]
-                    if not reader.fieldnames or "performance" not in reader.fieldnames or "metric" not in reader.fieldnames:
+                    if (
+                        not reader.fieldnames
+                        or "performance" not in reader.fieldnames
+                        or "metric" not in reader.fieldnames
+                    ):
                         continue
                     for row_idx, row in enumerate(reader):
                         perf_val = row.get("performance", "").strip()
@@ -3293,17 +3614,19 @@ def _parse_multiple_results_from_artifacts(
                         # Same model naming as local handle_multiple_results: model_name + "_" + str(model)
                         row_model = row.get("model", row_idx)
                         display_model = f"{model_info.get('name')}_{row_model}"
-                        parsed_list.append({
-                            "model": display_model,
-                            "performance": perf_float,
-                            "metric": metric_val,
-                            "node_id": row_idx,
-                            "local_gpus": 1,
-                            "duration": "N/A",
-                            "gpu_architecture": gpu_arch,
-                            "data_name": "N/A",
-                            "data_provider": "N/A",
-                        })
+                        parsed_list.append(
+                            {
+                                "model": display_model,
+                                "performance": perf_float,
+                                "metric": metric_val,
+                                "node_id": row_idx,
+                                "local_gpus": 1,
+                                "duration": "N/A",
+                                "gpu_architecture": gpu_arch,
+                                "data_name": "N/A",
+                                "data_provider": "N/A",
+                            }
+                        )
                 if parsed_list:
                     self.console.print(
                         f"[green]  ✓ Parsed performance from {csv_path.name} ({len(parsed_list)} row(s))[/green]"
@@ -3323,21 +3646,43 @@ def _aggregation_for_extra_column(self, column_name: str) -> str:
         """
         col = column_name.lower().strip()
         # Sum: counts, totals, throughput-like
-        if any(k in col for k in [
-            "count", "total", "samples", "tokens", "throughput",
-            "requests", "images", "bandwidth", "ops"
-        ]):
+        if any(
+            k in col
+            for k in [
+                "count",
+                "total",
+                "samples",
+                "tokens",
+                "throughput",
+                "requests",
+                "images",
+                "bandwidth",
+                "ops",
+            ]
+        ):
             return "sum"
         # Average: rates per unit, utilization, ratios
-        if any(k in col for k in [
-            "utilization", "usage", "percent", "ratio", "latency",
-            "time_ms", "ttft", "tpot", "accuracy", "loss"
-        ]):
+        if any(
+            k in col
+            for k in [
+                "utilization",
+                "usage",
+                "percent",
+                "ratio",
+                "latency",
+                "time_ms",
+                "ttft",
+                "tpot",
+                "accuracy",
+                "loss",
+            ]
+        ):
             return "average"
         # Max: duration (slowest node), memory, capacity
-        if any(k in col for k in [
-            "duration", "time", "seconds", "memory", "bytes", "mb", "gb"
-        ]):
+        if any(
+            k in col
+            for k in ["duration", "time", "seconds", "memory", "bytes", "mb", "gb"]
+        ):
             return "max"
         return "first"
 
@@ -3420,7 +3765,11 @@ def _merge_multi_node_multiple_results_csv(
                     continue
                 values = [r.get(col) for r in group]
                 try:
-                    nums = [float(str(v).strip()) for v in values if v is not None and str(v).strip()]
+                    nums = [
+                        float(str(v).strip())
+                        for v in values
+                        if v is not None and str(v).strip()
+                    ]
                 except (ValueError, TypeError):
                     nums = []
                 if nums:
@@ -3441,7 +3790,9 @@ def _merge_multi_node_multiple_results_csv(
             return False
         output_path.parent.mkdir(parents=True, exist_ok=True)
         with open(output_path, "w", newline="", encoding="utf-8") as f:
-            writer = csv_module.DictWriter(f, fieldnames=all_columns, extrasaction="ignore")
+            writer = csv_module.DictWriter(
+                f, fieldnames=all_columns, extrasaction="ignore"
+            )
             writer.writeheader()
             writer.writerows(merged_rows)
         self.console.print(
@@ -3495,7 +3846,9 @@ def cleanup(self, deployment_id: str) -> bool:
             self.console.print(f"[yellow]Deleted K8s Job: {deployment_id}[/yellow]")
         except ApiException as e:
             if e.status != 404:
-                self.console.print(f"[yellow]⚠ Job cleanup warning: {e.reason}[/yellow]")
+                self.console.print(
+                    f"[yellow]⚠ Job cleanup warning: {e.reason}[/yellow]"
+                )
                 success = False
         except Exception as e:
             self.console.print(f"[yellow]⚠ Job cleanup error: {e}[/yellow]")
@@ -3507,9 +3860,7 @@ def cleanup(self, deployment_id: str) -> bool:
             self.core_v1.delete_namespaced_config_map(
                 name=configmap_name, namespace=self.namespace
             )
-            self.console.print(
-                f"[yellow]Deleted ConfigMap: {configmap_name}[/yellow]"
-            )
+            self.console.print(f"[yellow]Deleted ConfigMap: {configmap_name}[/yellow]")
         except ApiException as e:
             if e.status != 404:
                 self.console.print(
@@ -3532,4 +3883,3 @@ def cleanup(self, deployment_id: str) -> bool:
             pass
 
         return success
-
diff --git a/src/madengine/deployment/kubernetes_launcher_mixin.py b/src/madengine/deployment/kubernetes_launcher_mixin.py
index e875f6d2..2b4d517e 100644
--- a/src/madengine/deployment/kubernetes_launcher_mixin.py
+++ b/src/madengine/deployment/kubernetes_launcher_mixin.py
@@ -28,28 +28,28 @@ def _generate_torchrun_command(
     ) -> str:
         """
         Generate torchrun launcher command for K8s Indexed Jobs.
-        
+
         For single-node (nnodes=1), generates standalone torchrun command.
         For multi-node (nnodes>1), generates distributed torchrun with headless
         service DNS for coordination.
-        
+
         Uses K8s environment variables for distributed coordination:
         - JOB_COMPLETION_INDEX: Pod index (0, 1, 2, ...)
         - Headless service DNS for MASTER_ADDR
-        
+
         CRITICAL FIX: For bash scripts that use ${BASH_SOURCE[0]}, we cd into the
         script directory first so relative paths resolve correctly. This fixes the
         issue where profiling tool wrappers prevent BASH_SOURCE from resolving.
-        
+
         Args:
             nnodes: Number of nodes (pods). Must be >= 1.
             nproc_per_node: GPUs per node. Must be >= 1.
             master_port: Master communication port. Must be 1-65535.
             model_script: Path to model's run script. Cannot be empty.
-        
+
         Returns:
             Complete torchrun command string
-        
+
         Raises:
             ValueError: If any parameter is invalid
         """
@@ -57,15 +57,19 @@ def _generate_torchrun_command(
         if not isinstance(nnodes, int) or nnodes < 1:
             raise ValueError(f"nnodes must be integer >= 1, got {nnodes}")
         if not isinstance(nproc_per_node, int) or nproc_per_node < 1:
-            raise ValueError(f"nproc_per_node must be integer >= 1, got {nproc_per_node}")
+            raise ValueError(
+                f"nproc_per_node must be integer >= 1, got {nproc_per_node}"
+            )
         if not isinstance(master_port, int) or not (1 <= master_port <= 65535):
             raise ValueError(f"master_port must be 1-65535, got {master_port}")
         if not model_script or not isinstance(model_script, str):
-            raise ValueError(f"model_script must be non-empty string, got {model_script}")
-        
+            raise ValueError(
+                f"model_script must be non-empty string, got {model_script}"
+            )
+
         # Check if model_script is a bash script
         # If so, execute it directly as it handles torchrun internally
-        if model_script.endswith('.sh'):
+        if model_script.endswith(".sh"):
             # For bash scripts, set environment variables and execute script
             # The script itself will invoke torchrun with the appropriate Python file
             # CRITICAL: cd to script directory first so BASH_SOURCE[0] resolves correctly
@@ -82,7 +86,7 @@ def _generate_torchrun_command(
 export MAD_MULTI_NODE_RUNNER="torchrun --nnodes={nnodes} --nproc_per_node={nproc_per_node} --node_rank=${{JOB_COMPLETION_INDEX}} --master_addr=${{MASTER_ADDR}} --master_port={master_port}"
 export MAD_RUNTIME_NGPUS={nproc_per_node}
 cd {script_dir} && bash {script_name}"""
-        
+
         # For Python scripts, invoke torchrun directly
         # For single-node, simpler standalone command
         if nnodes == 1:
@@ -91,7 +95,7 @@ def _generate_torchrun_command(
     --nnodes=1 \\
     --nproc_per_node={nproc_per_node} \\
     {model_script}"""
-        
+
         # Multi-node: Use headless service DNS and JOB_COMPLETION_INDEX
         return f"""# Multi-node torchrun setup (Kubernetes Indexed Job)
 export MASTER_ADDR="{self.job_name}-0.{self._k8s_headless_subdomain_label}.{self.namespace}.svc.cluster.local"
@@ -118,32 +122,32 @@ def _generate_torchrun_command(
     --role=worker \\
     --tee=3 \\
     {model_script}"""
-    
+
     def _generate_deepspeed_command(
         self, nnodes: int, nproc_per_node: int, master_port: int, model_script: str
     ) -> str:
         """
         Generate DeepSpeed launcher command for K8s Indexed Jobs.
-        
+
         DeepSpeed has its own launcher that handles:
         - ZeRO optimization stages (ZeRO-1, ZeRO-2, ZeRO-3)
         - Gradient accumulation
         - Mixed precision training
         - Pipeline parallelism
         - Hostfile management (handled by K8s in our case)
-        
+
         For single-node (nnodes=1), uses localhost setup.
         For multi-node (nnodes>1), uses headless service DNS for coordination.
-        
+
         Args:
             nnodes: Number of nodes (pods). Must be >= 1.
             nproc_per_node: GPUs per node. Must be >= 1.
             master_port: Master communication port. Must be 1-65535.
             model_script: Path to model's run script. Cannot be empty.
-        
+
         Returns:
             Complete DeepSpeed launcher command string
-        
+
         Raises:
             ValueError: If any parameter is invalid
         """
@@ -151,12 +155,16 @@ def _generate_deepspeed_command(
         if not isinstance(nnodes, int) or nnodes < 1:
             raise ValueError(f"nnodes must be integer >= 1, got {nnodes}")
         if not isinstance(nproc_per_node, int) or nproc_per_node < 1:
-            raise ValueError(f"nproc_per_node must be integer >= 1, got {nproc_per_node}")
+            raise ValueError(
+                f"nproc_per_node must be integer >= 1, got {nproc_per_node}"
+            )
         if not isinstance(master_port, int) or not (1 <= master_port <= 65535):
             raise ValueError(f"master_port must be 1-65535, got {master_port}")
         if not model_script or not isinstance(model_script, str):
-            raise ValueError(f"model_script must be non-empty string, got {model_script}")
-        
+            raise ValueError(
+                f"model_script must be non-empty string, got {model_script}"
+            )
+
         # For single-node
         if nnodes == 1:
             return f"""# DeepSpeed Single-Node Setup
@@ -176,7 +184,7 @@ def _generate_deepspeed_command(
 deepspeed --num_gpus={nproc_per_node} \\
     --master_port={master_port} \\
     {model_script}"""
-        
+
         # Multi-node: Use K8s headless service for coordination
         return f"""# Multi-node DeepSpeed setup (Kubernetes Indexed Job)
 export MASTER_ADDR="{self.job_name}-0.{self._k8s_headless_subdomain_label}.{self.namespace}.svc.cluster.local"
@@ -217,25 +225,25 @@ def _generate_deepspeed_command(
     --num_nodes={nnodes} \\
     --num_gpus={nproc_per_node} \\
     {model_script}"""
-    
+
     def _generate_bash_script_command(
         self, nnodes: int, nproc_per_node: int, master_port: int, model_script: str
     ) -> str:
         """
         Generate command to execute a bash script directly.
-        
+
         This is used when the model script is a .sh file that handles
         launcher invocation internally (e.g., using torchrun inside the script).
-        
+
         Sets up environment variables for distributed training that the bash
         script can use.
-        
+
         Args:
             nnodes: Number of nodes (pods)
             nproc_per_node: GPUs per node
             master_port: Master communication port
             model_script: Path to the bash script
-        
+
         Returns:
             Command to execute the bash script with environment setup
         """
@@ -262,7 +270,7 @@ def _generate_bash_script_command(
 
 # Execute the bash script directly
 bash {model_script}"""
-        
+
         # Multi-node: Use K8s headless service for coordination
         return f"""# Bash Script Execution (Multi-Node)
 # Setting up environment for script to use
@@ -286,47 +294,47 @@ def _generate_bash_script_command(
 
 # Execute the bash script directly
 bash {model_script}"""
-    
+
     def _generate_torchtitan_command(
         self, nnodes: int, nproc_per_node: int, master_port: int, model_script: str
     ) -> str:
         """
         Generate TorchTitan launcher command for K8s Indexed Jobs.
-        
+
         TorchTitan is a PyTorch native platform for large-scale LLM pre-training
         that supports multi-dimensional parallelism:
         - FSDP2 (Fully Sharded Data Parallel v2)
         - Tensor Parallel (TP)
         - Pipeline Parallel (PP)
         - Context Parallel (CP)
-        
+
         TorchTitan uses torchrun as its underlying distributed launcher but
         requires additional configuration for its parallelism strategies.
-        
+
         For single-node (nnodes=1): Uses standalone torchrun with TP
         For multi-node (nnodes>1): Uses distributed torchrun with TP+PP+FSDP2
-        
+
         Uses K8s environment variables for distributed coordination:
         - JOB_COMPLETION_INDEX: Pod index (0, 1, 2, ...)
         - Headless service DNS for MASTER_ADDR
-        
+
         Args:
             nnodes: Number of nodes (pods). Must be >= 1.
             nproc_per_node: GPUs per node. Must be >= 1.
             master_port: Master communication port. Must be 1-65535.
             model_script: Path to model's run script. Cannot be empty.
-        
+
         Returns:
             Complete torchtitan launch command string with environment setup
-        
+
         Raises:
             ValueError: If any parameter is invalid
-        
+
         Example single-node output:
             export TORCHTITAN_TENSOR_PARALLEL_SIZE=8
             export TORCHTITAN_PIPELINE_PARALLEL_SIZE=1
             torchrun --standalone --nproc_per_node=8 train.py --config llama3_8b.toml
-        
+
         Example multi-node output:
             export MASTER_ADDR="job-0.job.namespace.svc.cluster.local"
             export TORCHTITAN_TENSOR_PARALLEL_SIZE=8
@@ -338,12 +346,16 @@ def _generate_torchtitan_command(
         if not isinstance(nnodes, int) or nnodes < 1:
             raise ValueError(f"nnodes must be integer >= 1, got {nnodes}")
         if not isinstance(nproc_per_node, int) or nproc_per_node < 1:
-            raise ValueError(f"nproc_per_node must be integer >= 1, got {nproc_per_node}")
+            raise ValueError(
+                f"nproc_per_node must be integer >= 1, got {nproc_per_node}"
+            )
         if not isinstance(master_port, int) or not (1 <= master_port <= 65535):
             raise ValueError(f"master_port must be 1-65535, got {master_port}")
         if not model_script or not isinstance(model_script, str):
-            raise ValueError(f"model_script must be non-empty string, got {model_script}")
-        
+            raise ValueError(
+                f"model_script must be non-empty string, got {model_script}"
+            )
+
         # For single-node, use standalone mode with Tensor Parallelism only
         if nnodes == 1:
             return f"""# TorchTitan single-node setup (Tensor Parallelism)
@@ -362,7 +374,7 @@ def _generate_torchtitan_command(
     --nnodes=1 \\
     --nproc_per_node={nproc_per_node} \\
     {model_script}"""
-        
+
         # Multi-node: Use headless service DNS and enable all parallelism strategies
         return f"""# TorchTitan multi-node setup (K8s Indexed Job)
 export MASTER_ADDR="{self.job_name}-0.{self._k8s_headless_subdomain_label}.{self.namespace}.svc.cluster.local"
@@ -399,34 +411,34 @@ def _generate_torchtitan_command(
     --role=worker \\
     --tee=3 \\
     {model_script}"""
-    
+
     def _generate_sglang_disagg_command(
         self, nnodes: int, nproc_per_node: int, master_port: int, model_script: str
     ) -> str:
         """
         Generate SGLang Disaggregated launcher command for K8s Indexed Jobs.
-        
+
         SGLang Disaggregated uses separate node pools for:
         - Proxy (index 0): Load balancer and request router
         - Prefill (indices 1 to xP): Prompt processing
         - Decode (indices xP+1 to end): Token generation
-        
+
         Communication via Mooncake framework for efficient KV cache transfer.
-        
+
         Architecture:
         - Pod 0: Runs mini_lb (proxy/load balancer)
         - Pods 1-xP: Run prefill servers
         - Pods xP+1 to N-1: Run decode servers
-        
+
         Args:
             nnodes: Total number of pods (must be >= 3)
             nproc_per_node: GPUs per pod
             master_port: Port for proxy service
             model_script: Path to model launch script
-            
+
         Returns:
             Complete disaggregated launch setup
-            
+
         Raises:
             ValueError: If nnodes < 3 or invalid parameters
         """
@@ -439,12 +451,14 @@ def _generate_sglang_disagg_command(
             raise ValueError(f"nproc_per_node must be >= 1, got {nproc_per_node}")
         if not model_script or not isinstance(model_script, str):
             raise ValueError(f"model_script must be non-empty string")
-        
+
         # Check if custom split is specified in additional_context
-        sglang_disagg_config = self.config.additional_context.get("distributed", {}).get("sglang_disagg", {})
+        sglang_disagg_config = self.config.additional_context.get(
+            "distributed", {}
+        ).get("sglang_disagg", {})
         prefill_nodes = sglang_disagg_config.get("prefill_nodes")
         decode_nodes = sglang_disagg_config.get("decode_nodes")
-        
+
         if prefill_nodes is not None and decode_nodes is not None:
             # User specified custom split - validate
             if prefill_nodes < 1 or decode_nodes < 1:
@@ -464,18 +478,22 @@ def _generate_sglang_disagg_command(
             # Default automatic split (can be customized via additional_context)
             xP = max(1, (nnodes - 1) * 2 // 5)  # ~40% prefill
             yD = nnodes - 1 - xP  # remaining decode
-        
+
         # Build prefill and decode server lists
-        prefill_servers = " ".join([
-            f"http://{self.job_name}-{i}.{self._k8s_headless_subdomain_label}.{self.namespace}.svc.cluster.local:30000"
-            for i in range(1, xP + 1)
-        ])
-        
-        decode_servers = " ".join([
-            f"http://{self.job_name}-{i}.{self._k8s_headless_subdomain_label}.{self.namespace}.svc.cluster.local:30000"
-            for i in range(xP + 1, nnodes)
-        ])
-        
+        prefill_servers = " ".join(
+            [
+                f"http://{self.job_name}-{i}.{self._k8s_headless_subdomain_label}.{self.namespace}.svc.cluster.local:30000"
+                for i in range(1, xP + 1)
+            ]
+        )
+
+        decode_servers = " ".join(
+            [
+                f"http://{self.job_name}-{i}.{self._k8s_headless_subdomain_label}.{self.namespace}.svc.cluster.local:30000"
+                for i in range(xP + 1, nnodes)
+            ]
+        )
+
         return f"""# SGLang Disaggregated K8s Setup
 # ============================================
 # Cluster: {nnodes} pods total
@@ -508,17 +526,17 @@ def _generate_sglang_disagg_command(
 if [ "$POD_INDEX" -eq 0 ]; then
     # Proxy Node (Load Balancer)
     echo "🔀 This pod is PROXY (Load Balancer)"
-    
+
     python3 -m sglang.srt.disaggregation.mini_lb \\
         --prefill {prefill_servers} \\
         --decode {decode_servers} \\
         --host 0.0.0.0 \\
         --port {master_port}
-    
+
 elif [ "$POD_INDEX" -le "{xP}" ]; then
     # Prefill Nodes
     echo "⚡ This pod is PREFILL Node"
-    
+
     python3 -m sglang.launch_server \\
         --model-path "$MODEL_PATH" \\
         --disaggregation-mode prefill \\
@@ -527,11 +545,11 @@ def _generate_sglang_disagg_command(
         --port 30000 \\
         --trust-remote-code \\
         --disaggregation-transfer-backend mooncake
-    
+
 else
     # Decode Nodes
     echo "🔤 This pod is DECODE Node"
-    
+
     python3 -m sglang.launch_server \\
         --model-path "$MODEL_PATH" \\
         --disaggregation-mode decode \\
@@ -544,7 +562,7 @@ def _generate_sglang_disagg_command(
 
 echo "SGLang Disaggregated setup complete"
 """
-    
+
     def _generate_vllm_command(
         self,
         nnodes: int,
@@ -555,32 +573,32 @@ def _generate_vllm_command(
     ) -> str:
         """
         Generate vLLM launcher command for K8s Indexed Jobs.
-        
+
         vLLM is an inference engine with its own process management via Ray.
         Unlike training frameworks, vLLM doesn't use torchrun.
-        
+
         Architecture:
         - Single-node: Tensor Parallelism (TP) across GPUs, no Ray needed
         - Multi-node: Data Parallelism where each node runs independent vLLM replica
           * Each replica uses TP across its local GPUs
           * Ray coordinates resources on each node independently
           * Benefits: Simpler, more robust, better for inference serving
-        
+
         For K8s multi-node:
         - Each pod runs its own independent vLLM instance
         - Uses Ray for local GPU coordination
         - NO shared Ray cluster across pods (Data Parallelism mode)
-        
+
         Args:
             nnodes: Number of nodes (pods). Must be >= 1.
             nproc_per_node: GPUs per node. Must be >= 1.
             master_port: Master communication port (for Ray). Must be 1-65535.
             model_script: Path to model's run script. Cannot be empty.
             model_args: CLI args for the script (e.g. --model_repo openai/gpt-oss-20b).
-        
+
         Returns:
             Complete vLLM launch setup with environment configuration
-        
+
         Raises:
             ValueError: If any parameter is invalid
         """
@@ -588,17 +606,23 @@ def _generate_vllm_command(
         if not isinstance(nnodes, int) or nnodes < 1:
             raise ValueError(f"nnodes must be integer >= 1, got {nnodes}")
         if not isinstance(nproc_per_node, int) or nproc_per_node < 1:
-            raise ValueError(f"nproc_per_node must be integer >= 1, got {nproc_per_node}")
+            raise ValueError(
+                f"nproc_per_node must be integer >= 1, got {nproc_per_node}"
+            )
         if not isinstance(master_port, int) or not (1 <= master_port <= 65535):
             raise ValueError(f"master_port must be 1-65535, got {master_port}")
         if not model_script or not isinstance(model_script, str):
-            raise ValueError(f"model_script must be non-empty string, got {model_script}")
+            raise ValueError(
+                f"model_script must be non-empty string, got {model_script}"
+            )
 
         # Run script from its directory so relative paths (run_vllm.py, configs/) resolve
         script_dir = str(Path(model_script).parent)
         script_name = Path(model_script).name
-        run_cmd = f"cd /workspace/{script_dir} && bash {script_name} {model_args}".strip()
-        
+        run_cmd = (
+            f"cd /workspace/{script_dir} && bash {script_name} {model_args}".strip()
+        )
+
         # For single-node, simple TP setup (no Ray needed)
         if nnodes == 1:
             return f"""# vLLM single-node setup (Tensor Parallelism)
@@ -617,7 +641,7 @@ def _generate_vllm_command(
 
 # vLLM handles process management - run script from its directory so run_vllm.py/configs resolve
 {run_cmd}"""
-        
+
         # Multi-node: Data Parallelism with independent Ray clusters per pod
         return f"""# vLLM multi-node setup (K8s Data Parallelism Mode)
 export MASTER_ADDR="{self.job_name}-0.{self._k8s_headless_subdomain_label}.{self.namespace}.svc.cluster.local"
@@ -677,31 +701,31 @@ def _generate_sglang_command(
     ) -> str:
         """
         Generate SGLang launcher command for K8s Indexed Jobs.
-        
+
         SGLang is an inference engine with native launcher (sglang.launch_server).
         Similar to vLLM, it manages its own process spawning via Ray.
-        
+
         Architecture:
         - Single-node: Tensor Parallelism (TP) across GPUs
         - Multi-node: Uses SGLang's native multi-node launcher with Ray
           * TP across GPUs within each node
           * Ray for distributed coordination
-        
+
         For K8s:
         - Uses headless service for node discovery (similar to torchrun)
         - Each pod knows its rank via JOB_COMPLETION_INDEX
         - SGLang native launcher handles Ray cluster setup
-        
+
         Args:
             nnodes: Number of nodes (pods). Must be >= 1.
             nproc_per_node: GPUs per node. Must be >= 1.
             master_port: Master communication port (for NCCL/Ray). Must be 1-65535.
             model_script: Path to model's run script. Cannot be empty.
             model_args: CLI args for the script (e.g. --model_repo ...).
-        
+
         Returns:
             Complete SGLang launch setup with environment configuration
-        
+
         Raises:
             ValueError: If any parameter is invalid
         """
@@ -709,16 +733,22 @@ def _generate_sglang_command(
         if not isinstance(nnodes, int) or nnodes < 1:
             raise ValueError(f"nnodes must be integer >= 1, got {nnodes}")
         if not isinstance(nproc_per_node, int) or nproc_per_node < 1:
-            raise ValueError(f"nproc_per_node must be integer >= 1, got {nproc_per_node}")
+            raise ValueError(
+                f"nproc_per_node must be integer >= 1, got {nproc_per_node}"
+            )
         if not isinstance(master_port, int) or not (1 <= master_port <= 65535):
             raise ValueError(f"master_port must be 1-65535, got {master_port}")
         if not model_script or not isinstance(model_script, str):
-            raise ValueError(f"model_script must be non-empty string, got {model_script}")
+            raise ValueError(
+                f"model_script must be non-empty string, got {model_script}"
+            )
 
         # Run script from its directory so relative paths resolve; pass model args
         script_dir = str(Path(model_script).parent)
         script_name = Path(model_script).name
-        run_cmd = f"cd /workspace/{script_dir} && bash {script_name} {model_args}".strip()
+        run_cmd = (
+            f"cd /workspace/{script_dir} && bash {script_name} {model_args}".strip()
+        )
 
         # For single-node, simple TP setup
         if nnodes == 1:
@@ -781,30 +811,30 @@ def _generate_megatron_command(
     ) -> str:
         """
         Generate Megatron-LM launcher command for K8s Indexed Jobs.
-        
+
         Megatron-LM is a training framework for large transformers with tensor and pipeline parallelism.
         It uses torchrun as the underlying launcher but with Megatron-specific environment variables.
-        
+
         Architecture:
         - Single-node: Tensor Parallelism (TP) across GPUs
         - Multi-node: Tensor + Pipeline Parallelism
           * TP across GPUs within each node
           * PP across nodes
-        
+
         For K8s:
         - Uses headless service for node discovery (like torchrun/deepspeed)
         - Each pod knows its rank via JOB_COMPLETION_INDEX
         - Sets TENSOR_MODEL_PARALLEL_SIZE and PIPELINE_MODEL_PARALLEL_SIZE (Megatron-Core standard)
-        
+
         Args:
             nnodes: Number of nodes (pods). Must be >= 1.
             nproc_per_node: GPUs per node. Must be >= 1.
             master_port: Master communication port (for NCCL). Must be 1-65535.
             model_script: Path to model's run script. Cannot be empty.
-        
+
         Returns:
             Complete Megatron-LM launch setup with environment configuration
-        
+
         Raises:
             ValueError: If any parameter is invalid
         """
@@ -812,12 +842,16 @@ def _generate_megatron_command(
         if not isinstance(nnodes, int) or nnodes < 1:
             raise ValueError(f"nnodes must be integer >= 1, got {nnodes}")
         if not isinstance(nproc_per_node, int) or nproc_per_node < 1:
-            raise ValueError(f"nproc_per_node must be integer >= 1, got {nproc_per_node}")
+            raise ValueError(
+                f"nproc_per_node must be integer >= 1, got {nproc_per_node}"
+            )
         if not isinstance(master_port, int) or not (1 <= master_port <= 65535):
             raise ValueError(f"master_port must be 1-65535, got {master_port}")
         if not model_script or not isinstance(model_script, str):
-            raise ValueError(f"model_script must be non-empty string, got {model_script}")
-        
+            raise ValueError(
+                f"model_script must be non-empty string, got {model_script}"
+            )
+
         # For single-node, use TP only
         if nnodes == 1:
             return f"""# Megatron-LM single-node setup (Tensor Parallelism)
@@ -840,7 +874,7 @@ def _generate_megatron_command(
     --standalone \\
     --nproc_per_node={nproc_per_node} \\
     {model_script}"""
-        
+
         # Multi-node: TP + PP
         else:
             # Use headless service for node discovery (set by template)
@@ -913,7 +947,9 @@ def _generate_primus_command(
             manifest if isinstance(manifest, dict) else None,
             self.config.additional_context,
         )
-        config_path = primus_cfg.get("config_path", "examples/torchtitan/configs/MI300X/qwen3_1.7B-pretrain.yaml")
+        config_path = primus_cfg.get(
+            "config_path", "examples/torchtitan/configs/MI300X/qwen3_1.7B-pretrain.yaml"
+        )
         cli_extra = primus_cfg.get("cli_extra", "")
         config_path_quoted = config_path.replace('"', '\\"')
         lines = [
@@ -945,9 +981,7 @@ def _generate_primus_command(
                 ]
             )
         else:
-            master_dns = (
-                f"{self.job_name}-0.{self._k8s_headless_subdomain_label}.{self.namespace}.svc.cluster.local"
-            )
+            master_dns = f"{self.job_name}-0.{self._k8s_headless_subdomain_label}.{self.namespace}.svc.cluster.local"
             lines.extend(
                 [
                     "# Multi-node: Indexed Job + headless Service (pod-0 DNS as master)",
@@ -969,4 +1003,3 @@ def _generate_primus_command(
             lines.append(f"cd {script_dir} && bash {script_name}")
 
         return "\n".join(lines)
-
diff --git a/src/madengine/deployment/presets/__init__.py b/src/madengine/deployment/presets/__init__.py
index f554fc4f..cfa6513e 100644
--- a/src/madengine/deployment/presets/__init__.py
+++ b/src/madengine/deployment/presets/__init__.py
@@ -3,4 +3,3 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
-
diff --git a/src/madengine/deployment/presets/k8s/__init__.py b/src/madengine/deployment/presets/k8s/__init__.py
index 25a33dfa..3e0eb94d 100644
--- a/src/madengine/deployment/presets/k8s/__init__.py
+++ b/src/madengine/deployment/presets/k8s/__init__.py
@@ -3,4 +3,3 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
-
diff --git a/src/madengine/deployment/presets/k8s/defaults.json b/src/madengine/deployment/presets/k8s/defaults.json
index 36fc9f3e..d42b60b5 100644
--- a/src/madengine/deployment/presets/k8s/defaults.json
+++ b/src/madengine/deployment/presets/k8s/defaults.json
@@ -3,7 +3,7 @@
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
   "debug": false,
-  
+
   "k8s": {
     "kubeconfig": "~/.kube/config",
     "namespace": "default",
@@ -21,9 +21,8 @@
       "runtime_secret_name": null
     }
   },
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "8"
   }
 }
-
diff --git a/src/madengine/deployment/presets/k8s/gpu-vendors/amd-multi-gpu.json b/src/madengine/deployment/presets/k8s/gpu-vendors/amd-multi-gpu.json
index 6e559742..72e53cd8 100644
--- a/src/madengine/deployment/presets/k8s/gpu-vendors/amd-multi-gpu.json
+++ b/src/madengine/deployment/presets/k8s/gpu-vendors/amd-multi-gpu.json
@@ -13,4 +13,3 @@
     "RCCL_ENABLE_HIPGRAPH": "0"
   }
 }
-
diff --git a/src/madengine/deployment/presets/k8s/gpu-vendors/amd.json b/src/madengine/deployment/presets/k8s/gpu-vendors/amd.json
index 42069620..095a8212 100644
--- a/src/madengine/deployment/presets/k8s/gpu-vendors/amd.json
+++ b/src/madengine/deployment/presets/k8s/gpu-vendors/amd.json
@@ -4,4 +4,3 @@
     "gpu_resource_name": "amd.com/gpu"
   }
 }
-
diff --git a/src/madengine/deployment/presets/k8s/gpu-vendors/nvidia.json b/src/madengine/deployment/presets/k8s/gpu-vendors/nvidia.json
index f7831f92..618eb9e0 100644
--- a/src/madengine/deployment/presets/k8s/gpu-vendors/nvidia.json
+++ b/src/madengine/deployment/presets/k8s/gpu-vendors/nvidia.json
@@ -12,4 +12,3 @@
     "OMP_NUM_THREADS": "12"
   }
 }
-
diff --git a/src/madengine/deployment/presets/k8s/profiles/multi-gpu.json b/src/madengine/deployment/presets/k8s/profiles/multi-gpu.json
index f92df7f6..d3b293b1 100644
--- a/src/madengine/deployment/presets/k8s/profiles/multi-gpu.json
+++ b/src/madengine/deployment/presets/k8s/profiles/multi-gpu.json
@@ -13,4 +13,3 @@
     "master_port": 29500
   }
 }
-
diff --git a/src/madengine/deployment/presets/k8s/profiles/multi-node.json b/src/madengine/deployment/presets/k8s/profiles/multi-node.json
index 3d814f38..6ccd87c0 100644
--- a/src/madengine/deployment/presets/k8s/profiles/multi-node.json
+++ b/src/madengine/deployment/presets/k8s/profiles/multi-node.json
@@ -19,4 +19,3 @@
     "NCCL_TIMEOUT": "600"
   }
 }
-
diff --git a/src/madengine/deployment/presets/k8s/profiles/single-gpu.json b/src/madengine/deployment/presets/k8s/profiles/single-gpu.json
index 34106655..9f04998c 100644
--- a/src/madengine/deployment/presets/k8s/profiles/single-gpu.json
+++ b/src/madengine/deployment/presets/k8s/profiles/single-gpu.json
@@ -8,4 +8,3 @@
     "cpu_limit": "16"
   }
 }
-
diff --git a/src/madengine/deployment/presets/slurm/__init__.py b/src/madengine/deployment/presets/slurm/__init__.py
index 9d11608c..00a1043d 100644
--- a/src/madengine/deployment/presets/slurm/__init__.py
+++ b/src/madengine/deployment/presets/slurm/__init__.py
@@ -12,4 +12,3 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
-
diff --git a/src/madengine/deployment/presets/slurm/defaults.json b/src/madengine/deployment/presets/slurm/defaults.json
index aa98b06f..f73c671a 100644
--- a/src/madengine/deployment/presets/slurm/defaults.json
+++ b/src/madengine/deployment/presets/slurm/defaults.json
@@ -3,11 +3,11 @@
   "_description": "Default configuration for SLURM HPC cluster deployment",
   "_note": "Default partition is 'amd-rccl' for AMD RCCL cluster. Override if your cluster uses different partition names.",
   "_best_practice": "Use shared storage workspace for multi-node. Single-node auto-detects NFS and uses shared storage when available.",
-  
+
   "gpu_vendor": "AMD",
   "guest_os": "UBUNTU",
   "debug": false,
-  
+
   "slurm": {
     "partition": "amd-rccl",
     "nodes": 1,
@@ -17,16 +17,15 @@
     "exclusive": true,
     "modules": []
   },
-  
+
   "distributed": {
     "backend": "nccl",
     "port": 29500
   },
-  
+
   "env_vars": {
     "OMP_NUM_THREADS": "8",
     "MIOPEN_FIND_MODE": "1",
     "MIOPEN_USER_DB_PATH": "/tmp/.miopen"
   }
 }
-
diff --git a/src/madengine/deployment/presets/slurm/profiles/multi-node.json b/src/madengine/deployment/presets/slurm/profiles/multi-node.json
index 2e499307..700bd3bb 100644
--- a/src/madengine/deployment/presets/slurm/profiles/multi-node.json
+++ b/src/madengine/deployment/presets/slurm/profiles/multi-node.json
@@ -1,18 +1,18 @@
 {
   "_comment": "Multi-node SLURM profile - optimized for distributed workloads across nodes",
   "_description": "Configuration for multi-node distributed execution (training/inference) on SLURM cluster",
-  
+
   "slurm": {
     "nodes": 2,
     "gpus_per_node": 8,
     "time": "24:00:00"
   },
-  
+
   "distributed": {
     "backend": "nccl",
     "port": 29500
   },
-  
+
   "env_vars": {
     "NCCL_DEBUG": "WARN",
     "NCCL_DEBUG_SUBSYS": "INIT,NET",
@@ -27,4 +27,3 @@
     "RCCL_ENABLE_HIPGRAPH": "0"
   }
 }
-
diff --git a/src/madengine/deployment/presets/slurm/profiles/single-node.json b/src/madengine/deployment/presets/slurm/profiles/single-node.json
index 7c62ef7a..439c8ebe 100644
--- a/src/madengine/deployment/presets/slurm/profiles/single-node.json
+++ b/src/madengine/deployment/presets/slurm/profiles/single-node.json
@@ -1,15 +1,14 @@
 {
   "_comment": "Single-node SLURM profile - optimized for single node multi-GPU",
   "_description": "Configuration for running on a single SLURM node with multiple GPUs",
-  
+
   "slurm": {
     "nodes": 1,
     "gpus_per_node": 8,
     "time": "12:00:00"
   },
-  
+
   "env_vars": {
     "NCCL_DEBUG": "WARN"
   }
 }
-
diff --git a/src/madengine/deployment/slurm.py b/src/madengine/deployment/slurm.py
index b5eefbd4..6a4b4d71 100644
--- a/src/madengine/deployment/slurm.py
+++ b/src/madengine/deployment/slurm.py
@@ -17,7 +17,13 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 
-from .base import BaseDeployment, DeploymentConfig, DeploymentResult, DeploymentStatus, create_jinja_env
+from .base import (
+    BaseDeployment,
+    DeploymentConfig,
+    DeploymentResult,
+    DeploymentStatus,
+    create_jinja_env,
+)
 from .primus_backend import infer_primus_backend_from_model_name, merged_primus_config
 from .common import configure_multi_node_profiling, normalize_launcher
 from .config_loader import ConfigLoader, apply_deployment_config
@@ -82,9 +88,7 @@ def validate(self) -> bool:
         """Validate SLURM commands are available locally."""
         # Check required SLURM CLI tools
         for tool in self.REQUIRED_TOOLS:
-            result = subprocess.run(
-                ["which", tool], capture_output=True, timeout=5
-            )
+            result = subprocess.run(["which", tool], capture_output=True, timeout=5)
             if result.returncode != 0:
                 self.console.print(
                     f"[red]✗ Required tool not found: {tool}[/red]\n"
@@ -104,7 +108,9 @@ def validate(self) -> bool:
             return False
 
         if self.gpus_per_node < 1:
-            self.console.print(f"[red]✗ Invalid GPUs per node: {self.gpus_per_node}[/red]")
+            self.console.print(
+                f"[red]✗ Invalid GPUs per node: {self.gpus_per_node}[/red]"
+            )
             return False
 
         self.console.print("[green]✓ SLURM environment validated[/green]")
@@ -113,10 +119,10 @@ def validate(self) -> bool:
     def _validate_cli_availability(self) -> bool:
         """
         Validate madengine is available before job submission.
-        
+
         Compute nodes inherit the submission environment, so madengine
         must be available in PATH on the submission node.
-        
+
         Returns:
             bool: True if madengine is available and functional
         """
@@ -126,38 +132,31 @@ def _validate_cli_availability(self) -> bool:
                 capture_output=True,
                 text=True,
                 timeout=5,
-                check=False
+                check=False,
             )
             if result.returncode == 0:
                 version = result.stdout.strip() or "unknown"
                 self.console.print(
                     f"[green]✓[/green] madengine available: [cyan]{version}[/cyan]"
                 )
-                
+
                 # Show path for transparency
                 which_result = subprocess.run(
-                    ["which", "madengine"],
-                    capture_output=True,
-                    text=True,
-                    check=False
+                    ["which", "madengine"], capture_output=True, text=True, check=False
                 )
                 if which_result.returncode == 0:
                     cli_path = which_result.stdout.strip()
                     self.console.print(f"  Path: [dim]{cli_path}[/dim]")
-                
+
                 return True
             else:
-                self.console.print(
-                    "[red]✗ madengine found but returned error[/red]"
-                )
+                self.console.print("[red]✗ madengine found but returned error[/red]")
                 if result.stderr:
                     self.console.print(f"  Error: {result.stderr.strip()}")
                 return False
-                
+
         except FileNotFoundError:
-            self.console.print(
-                "\n[red]✗ ERROR: madengine not found[/red]\n"
-            )
+            self.console.print("\n[red]✗ ERROR: madengine not found[/red]\n")
             self.console.print(
                 "[yellow]Compute nodes need madengine in PATH.[/yellow]\n"
                 "\n[bold]To fix:[/bold]\n"
@@ -184,7 +183,7 @@ def prepare(self) -> bool:
                 "\n[yellow]⚠ Tip: Compute nodes inherit your submission environment[/yellow]"
             )
             return False
-        
+
         try:
             self.output_dir.mkdir(parents=True, exist_ok=True)
 
@@ -230,17 +229,21 @@ def _prepare_template_context(self, model_info: Dict) -> Dict[str, Any]:
         additional_context = self.config.additional_context.copy()
         additional_context["slurm"] = self.slurm_config
         resolved_gpus_per_node = resolve_runtime_gpus(model_info, additional_context)
-        
+
         # Extract launcher configuration
-        launcher_type = self.distributed_config.get("launcher", "torchrun")  # Default to torchrun
-        
+        launcher_type = self.distributed_config.get(
+            "launcher", "torchrun"
+        )  # Default to torchrun
+
         # Normalize launcher based on deployment type and validity
         launcher_type = normalize_launcher(launcher_type, "slurm")
-        
+
         nnodes = self.distributed_config.get("nnodes", self.nodes)
-        nproc_per_node = self.distributed_config.get("nproc_per_node", resolved_gpus_per_node)
+        nproc_per_node = self.distributed_config.get(
+            "nproc_per_node", resolved_gpus_per_node
+        )
         master_port = self.distributed_config.get("port", 29500)
-        
+
         # Apply multi-node profiling logic if tools are configured
         tools = additional_context.get("tools", [])
         if nnodes > 1 and tools:
@@ -249,28 +252,29 @@ def _prepare_template_context(self, model_info: Dict) -> Dict[str, Any]:
             class ConsoleLogger:
                 def __init__(self, console):
                     self.console = console
+
                 def info(self, msg):
                     self.console.print(f"[cyan]{msg}[/cyan]")
+
                 def warning(self, msg):
                     self.console.print(f"[yellow]{msg}[/yellow]")
+
                 def debug(self, msg):
                     pass  # Skip debug messages in console
-            
+
             profiling_config = configure_multi_node_profiling(
-                nnodes=nnodes,
-                tools_config=tools,
-                logger=ConsoleLogger(self.console)
+                nnodes=nnodes, tools_config=tools, logger=ConsoleLogger(self.console)
             )
-            
+
             if profiling_config["enabled"]:
                 tools = profiling_config["tools"]
             else:
                 # rocprofv3 not available - skip profiling for multi-node
                 tools = []
-            
+
             # Update tools in additional_context
             additional_context["tools"] = tools
-        
+
         # Generate launcher-specific command
         launcher_command = self._generate_launcher_command(
             launcher_type=launcher_type,
@@ -279,7 +283,7 @@ def debug(self, msg):
             master_port=master_port,
             model_name=model_info.get("name", "") or "",
         )
-        
+
         return {
             "model_name": model_info["name"],
             "manifest_file": os.path.abspath(self.config.manifest_file),
@@ -306,9 +310,9 @@ def debug(self, msg):
             "live_output": self.config.additional_context.get("live_output", False),
             "tags": " ".join(model_info.get("tags", [])),
             "multiple_results": model_info.get("multiple_results"),
-            "credential_file": "credential.json"
-            if Path("credential.json").exists()
-            else None,
+            "credential_file": (
+                "credential.json" if Path("credential.json").exists() else None
+            ),
             "data_file": "data.json" if Path("data.json").exists() else None,
             # Launcher configuration
             "launcher_type": launcher_type,
@@ -329,15 +333,15 @@ def _generate_launcher_command(
     ) -> str:
         """
         Generate launcher-specific command based on launcher type.
-        
+
         Follows k8s pattern: different launchers have different command generation.
-        
+
         Args:
             launcher_type: Type of launcher (torchrun, vllm, sglang, deepspeed, etc.)
             nnodes: Number of nodes
             nproc_per_node: GPUs per node
             master_port: Master communication port
-            
+
         Returns:
             Launcher-specific environment setup and command string
         """
@@ -348,13 +352,17 @@ def _generate_launcher_command(
         elif launcher_type == "sglang":
             return self._generate_sglang_command(nnodes, nproc_per_node, master_port)
         elif launcher_type == "sglang-disagg" or launcher_type == "sglang_disagg":
-            return self._generate_sglang_disagg_command(nnodes, nproc_per_node, master_port)
+            return self._generate_sglang_disagg_command(
+                nnodes, nproc_per_node, master_port
+            )
         elif launcher_type == "deepspeed":
             return self._generate_deepspeed_command(nnodes, nproc_per_node, master_port)
         elif launcher_type == "megatron":
             return self._generate_megatron_command(nnodes, nproc_per_node, master_port)
         elif launcher_type == "torchtitan":
-            return self._generate_torchtitan_command(nnodes, nproc_per_node, master_port)
+            return self._generate_torchtitan_command(
+                nnodes, nproc_per_node, master_port
+            )
         elif launcher_type == "primus":
             return self._generate_primus_command(
                 nnodes, nproc_per_node, master_port, model_name=model_name
@@ -373,15 +381,15 @@ def _generate_torchrun_command(
     ) -> str:
         """
         Generate torchrun launcher command for SLURM.
-        
+
         For single-node (nnodes=1): Uses standalone mode
         For multi-node (nnodes>1): Uses distributed mode with SLURM environment
-        
+
         Args:
             nnodes: Number of nodes
             nproc_per_node: GPUs per node
             master_port: Master port
-            
+
         Returns:
             MAD_MULTI_NODE_RUNNER environment variable setup
         """
@@ -397,81 +405,81 @@ def _generate_vllm_command(
     ) -> str:
         """
         Generate vLLM launcher environment variables.
-        
+
         vLLM manages its own process spawning - no torchrun needed.
         Model script directly invokes vLLM with tensor/pipeline parallelism.
-        
+
         Args:
             nnodes: Number of nodes
             nproc_per_node: GPUs per node
             master_port: Master port
-            
+
         Returns:
             Environment variable setup for vLLM
         """
         if nnodes == 1:
-            return f'''# vLLM single-node setup (Tensor Parallelism)
+            return f"""# vLLM single-node setup (Tensor Parallelism)
 export VLLM_TENSOR_PARALLEL_SIZE={nproc_per_node}
 export VLLM_PIPELINE_PARALLEL_SIZE=1
 export VLLM_DISTRIBUTED_BACKEND="auto"
-# vLLM handles its own process management - no MAD_MULTI_NODE_RUNNER needed'''
+# vLLM handles its own process management - no MAD_MULTI_NODE_RUNNER needed"""
         else:
             # One vLLM serve per node (TP only on that node), no shared Ray = data parallelism
-            return f'''# vLLM multi-node setup (data parallel: one serve per node, TP only)
+            return f"""# vLLM multi-node setup (data parallel: one serve per node, TP only)
 export VLLM_TENSOR_PARALLEL_SIZE={nproc_per_node}
 export VLLM_PIPELINE_PARALLEL_SIZE=1
 export VLLM_DISTRIBUTED_BACKEND="none"
-# vLLM handles its own process management - no MAD_MULTI_NODE_RUNNER needed'''
+# vLLM handles its own process management - no MAD_MULTI_NODE_RUNNER needed"""
 
     def _generate_sglang_command(
         self, nnodes: int, nproc_per_node: int, master_port: int
     ) -> str:
         """
         Generate SGLang launcher environment variables.
-        
+
         SGLang similar to vLLM - manages its own process spawning.
-        
+
         Args:
             nnodes: Number of nodes
             nproc_per_node: GPUs per node
             master_port: Master port
-            
+
         Returns:
             Environment variable setup for SGLang
         """
         if nnodes == 1:
-            return f'''# SGLang single-node setup (Tensor Parallelism)
+            return f"""# SGLang single-node setup (Tensor Parallelism)
 export SGLANG_TENSOR_PARALLEL_SIZE={nproc_per_node}
 export SGLANG_PIPELINE_PARALLEL_SIZE=1
-# SGLang handles its own process management - no MAD_MULTI_NODE_RUNNER needed'''
+# SGLang handles its own process management - no MAD_MULTI_NODE_RUNNER needed"""
         else:
             # One SGLang serve per node (TP only on that node), no cross-node coordination = data parallel
-            return f'''# SGLang multi-node setup (data parallel: one serve per node, TP only)
+            return f"""# SGLang multi-node setup (data parallel: one serve per node, TP only)
 export SGLANG_TENSOR_PARALLEL_SIZE={nproc_per_node}
 export SGLANG_PIPELINE_PARALLEL_SIZE=1
-# SGLang handles its own process management - no MAD_MULTI_NODE_RUNNER needed'''
+# SGLang handles its own process management - no MAD_MULTI_NODE_RUNNER needed"""
 
     def _generate_sglang_disagg_command(
         self, nnodes: int, nproc_per_node: int, master_port: int
     ) -> str:
         """
         Generate SGLang Disaggregated launcher environment for SLURM.
-        
+
         SGLang Disaggregated Architecture:
         - Node 0: Proxy (load balancer)
         - Nodes 1 to xP: Prefill nodes
         - Nodes xP+1 to xP+yD: Decode nodes
-        
+
         Minimum cluster: 3 nodes (1 proxy + 1 prefill + 1 decode)
-        
+
         Args:
             nnodes: Total number of nodes (must be >= 3)
             nproc_per_node: GPUs per node (tensor parallel size)
             master_port: Master port for coordination
-            
+
         Returns:
             Environment setup with node role assignment
-            
+
         Raises:
             ValueError: If nnodes < 3 (minimum for disagg)
         """
@@ -480,12 +488,14 @@ def _generate_sglang_disagg_command(
                 f"SGLang Disaggregated requires minimum 3 nodes "
                 f"(1 proxy + 1 prefill + 1 decode), got {nnodes}"
             )
-        
+
         # Check if custom split is specified in additional_context
-        sglang_disagg_config = self.config.additional_context.get("distributed", {}).get("sglang_disagg", {})
+        sglang_disagg_config = self.config.additional_context.get(
+            "distributed", {}
+        ).get("sglang_disagg", {})
         prefill_nodes = sglang_disagg_config.get("prefill_nodes")
         decode_nodes = sglang_disagg_config.get("decode_nodes")
-        
+
         if prefill_nodes is not None and decode_nodes is not None:
             # User specified custom split - validate
             if prefill_nodes < 1 or decode_nodes < 1:
@@ -506,8 +516,8 @@ def _generate_sglang_disagg_command(
             # For N total nodes: 1 proxy + ~40% prefill + ~60% decode
             xP = max(1, (nnodes - 1) * 2 // 5)  # ~40% of worker nodes
             yD = nnodes - 1 - xP  # remaining nodes
-        
-        return f'''# SGLang Disaggregated multi-node setup
+
+        return f"""# SGLang Disaggregated multi-node setup
 # ============================================
 # Cluster Configuration:
 #   Total Nodes: {nnodes}
@@ -545,21 +555,21 @@ def _generate_sglang_disagg_command(
 echo "=========================================="
 
 # No MAD_MULTI_NODE_RUNNER - SGLang disagg handles process management
-# Model script should detect SGLANG_DISAGG_MODE and launch appropriately'''
+# Model script should detect SGLANG_DISAGG_MODE and launch appropriately"""
 
     def _generate_deepspeed_command(
         self, nnodes: int, nproc_per_node: int, master_port: int
     ) -> str:
         """
         Generate DeepSpeed launcher command.
-        
+
         DeepSpeed has its own launcher similar to torchrun.
-        
+
         Args:
             nnodes: Number of nodes
             nproc_per_node: GPUs per node
             master_port: Master port
-            
+
         Returns:
             MAD_MULTI_NODE_RUNNER with deepspeed launcher
         """
@@ -579,14 +589,14 @@ def _generate_megatron_command(
     ) -> str:
         """
         Generate Megatron-LM launcher command.
-        
+
         Megatron-LM typically uses torchrun but with specific environment variables.
-        
+
         Args:
             nnodes: Number of nodes
             nproc_per_node: GPUs per node
             master_port: Master port
-            
+
         Returns:
             MAD_MULTI_NODE_RUNNER with megatron-specific setup
         """
@@ -609,24 +619,24 @@ def _generate_torchtitan_command(
     ) -> str:
         """
         Generate TorchTitan launcher command for SLURM.
-        
+
         TorchTitan is a PyTorch native platform for LLM pre-training that uses
         torchrun as its underlying launcher but requires additional configuration
         for multi-dimensional parallelism (FSDP2, Tensor Parallel, Pipeline Parallel).
-        
+
         Key TorchTitan features:
         - Uses TOML configuration files for training setup
         - Supports FSDP2, Tensor Parallel, Pipeline Parallel, Context Parallel
         - Built on top of torchrun for distributed coordination
-        
+
         For single-node (nnodes=1): Uses standalone torchrun mode
         For multi-node (nnodes>1): Uses distributed torchrun with SLURM environment
-        
+
         Args:
             nnodes: Number of nodes
             nproc_per_node: GPUs per node
             master_port: Master port
-            
+
         Returns:
             MAD_MULTI_NODE_RUNNER with torchtitan-specific setup
         """
@@ -664,15 +674,21 @@ def _generate_primus_command(
         We only export PRIMUS_CONFIG_PATH and optional PRIMUS_CLI_EXTRA. No MAD_MULTI_NODE_RUNNER.
         """
         primus_cfg = merged_primus_config(
-            self.manifest if isinstance(getattr(self, "manifest", None), dict) else None,
+            (
+                self.manifest
+                if isinstance(getattr(self, "manifest", None), dict)
+                else None
+            ),
             self.config.additional_context,
         )
         config_path = primus_cfg.get("config_path", "exp_pretrain.yaml")
         cli_extra = primus_cfg.get("cli_extra", "")
         # Safe shell quoting for config_path and cli_extra
         config_path_quoted = config_path.replace('"', '\\"')
-        lines = [f'# Primus launcher (model script runs run_pretrain.sh)',
-                 f'export PRIMUS_CONFIG_PATH="{config_path_quoted}"']
+        lines = [
+            f"# Primus launcher (model script runs run_pretrain.sh)",
+            f'export PRIMUS_CONFIG_PATH="{config_path_quoted}"',
+        ]
         if (cli_extra or "").strip():
             cli_extra_quoted = cli_extra.replace('"', '\\"')
             lines.append(f'export PRIMUS_CLI_EXTRA="{cli_extra_quoted}"')
@@ -690,23 +706,23 @@ def _generate_basic_env_command(
     ) -> str:
         """
         Generate basic environment variables for unknown launchers.
-        
+
         Provides standard distributed execution environment variables
         and lets the model script handle launcher invocation.
-        
+
         Args:
             nnodes: Number of nodes
             nproc_per_node: GPUs per node
             master_port: Master port
-            
+
         Returns:
             Basic environment variable setup
         """
-        return f'''# Basic distributed environment (custom launcher)
+        return f"""# Basic distributed environment (custom launcher)
 export NNODES={nnodes}
 export NPROC_PER_NODE={nproc_per_node}
 export MASTER_PORT={master_port}
-# Model script should handle launcher invocation'''
+# Model script should handle launcher invocation"""
 
     def deploy(self) -> DeploymentResult:
         """Submit sbatch script to SLURM scheduler (locally)."""
@@ -724,11 +740,17 @@ def deploy(self) -> DeploymentResult:
         # Health-check srun invocations create SLURM jobs; we cancel them after preflight.
         enable_preflight = self.slurm_config.get("enable_node_check", True)
         auto_cleanup = self.slurm_config.get("auto_cleanup_nodes", False)
-        allow_submit_without_clean = self.slurm_config.get("allow_submit_without_clean_nodes", False)
+        allow_submit_without_clean = self.slurm_config.get(
+            "allow_submit_without_clean_nodes", False
+        )
         clean_nodes: List[str] = []
         health_check_job_name: Optional[str] = None
 
-        if enable_preflight and self.nodes >= 1 and not self.slurm_config.get("nodelist"):
+        if (
+            enable_preflight
+            and self.nodes >= 1
+            and not self.slurm_config.get("nodelist")
+        ):
             try:
                 selector = SlurmNodeSelector(
                     console=self.console,
@@ -741,10 +763,14 @@ def deploy(self) -> DeploymentResult:
                     exclude=self.slurm_config.get("exclude"),
                     constraint=self.slurm_config.get("constraint"),
                 )
-                health_check_job_name = getattr(selector, "_health_check_job_name", None)
+                health_check_job_name = getattr(
+                    selector, "_health_check_job_name", None
+                )
 
                 # Update exclude list if we found dirty/unreachable/unknown nodes
-                if updated_exclude and updated_exclude != self.slurm_config.get("exclude", ""):
+                if updated_exclude and updated_exclude != self.slurm_config.get(
+                    "exclude", ""
+                ):
                     self.console.print(
                         f"[dim]Updated exclude list for sbatch: {updated_exclude}[/dim]\n"
                     )
@@ -757,7 +783,9 @@ def deploy(self) -> DeploymentResult:
                     and not allow_submit_without_clean
                     and len(clean_nodes) < self.nodes
                 ):
-                    SlurmNodeSelector.cancel_health_check_jobs(health_check_job_name, self.console)
+                    SlurmNodeSelector.cancel_health_check_jobs(
+                        health_check_job_name, self.console
+                    )
                     return DeploymentResult(
                         status=DeploymentStatus.FAILED,
                         deployment_id="",
@@ -774,13 +802,13 @@ def deploy(self) -> DeploymentResult:
                     self.console.print(f"[dim]Using nodelist: {nodelist_str}[/dim]\n")
                     self.prepare()
             except Exception as e:
-                self.console.print(
-                    f"[yellow]⚠ Node health check failed: {e}[/yellow]"
-                )
+                self.console.print(f"[yellow]⚠ Node health check failed: {e}[/yellow]")
                 self.console.print("[dim]Continuing with job submission[/dim]\n")
             finally:
                 # Always cancel health-check jobs so they do not stay in the queue
-                SlurmNodeSelector.cancel_health_check_jobs(health_check_job_name, self.console)
+                SlurmNodeSelector.cancel_health_check_jobs(
+                    health_check_job_name, self.console
+                )
         # ==================== END PREFLIGHT ====================
 
         try:
@@ -842,7 +870,7 @@ def monitor(self, deployment_id: str) -> DeploymentResult:
                 return self._check_job_completion(deployment_id)
 
             status = result.stdout.strip().upper()
-            
+
             # Check if live output is enabled
             live_output = self.config.additional_context.get("live_output", False)
 
@@ -881,8 +909,11 @@ def monitor(self, deployment_id: str) -> DeploymentResult:
                 )
 
         except Exception as e:
-            self.console.print(f"[red]Monitor exception for job {deployment_id}: {e}[/red]")
+            self.console.print(
+                f"[red]Monitor exception for job {deployment_id}: {e}[/red]"
+            )
             import traceback
+
             self.console.print(f"[dim red]{traceback.format_exc()}[/dim red]")
             return DeploymentResult(
                 status=DeploymentStatus.FAILED,
@@ -893,80 +924,88 @@ def monitor(self, deployment_id: str) -> DeploymentResult:
     def _stream_job_output(self, job_id: str, final: bool = False):
         """Stream output from SLURM job output file."""
         # Track last position read from output file
-        if not hasattr(self, '_output_positions'):
+        if not hasattr(self, "_output_positions"):
             self._output_positions = {}
-        
+
         # Find output file
         output_dir = str(self.output_dir)
         output_pattern = f"{output_dir}/madengine-*_{job_id}_*.out"
-        
+
         try:
             import glob
+
             output_files = glob.glob(output_pattern)
-            
+
             if not output_files:
                 return  # Output file not created yet
-            
+
             output_file = output_files[0]  # Use first match
-            
+
             # Read new content from file
             try:
-                with open(output_file, 'r') as f:
+                with open(output_file, "r") as f:
                     # Seek to last position
                     last_pos = self._output_positions.get(job_id, 0)
                     f.seek(last_pos)
-                    
+
                     # Read new lines
                     new_content = f.read()
-                    
+
                     if new_content:
                         # Print new output with prefix
                         for line in new_content.splitlines():
                             if line.strip():  # Skip empty lines
                                 self.console.print(f"[dim cyan]│[/dim cyan] {line}")
-                    
+
                     # Update position
                     self._output_positions[job_id] = f.tell()
-                    
+
             except FileNotFoundError:
                 pass  # File not ready yet
-                
+
         except Exception as e:
             # Silently ignore streaming errors to not disrupt monitoring
             if final:
-                self.console.print(f"[dim yellow]Note: Could not stream output: {e}[/dim yellow]")
+                self.console.print(
+                    f"[dim yellow]Note: Could not stream output: {e}[/dim yellow]"
+                )
 
     def _show_log_summary(self, job_id: str, success: bool = True):
         """Show a summary with pointers to log files instead of streaming verbose output."""
         output_dir = str(self.output_dir)
-        
+
         try:
             import glob
+
             # Find output and error files for this job
             output_files = glob.glob(f"{output_dir}/madengine-*_{job_id}_*.out")
             error_files = glob.glob(f"{output_dir}/madengine-*_{job_id}_*.err")
-            
+
             if output_files or error_files:
                 status_symbol = "✓" if success else "✗"
                 status_color = "green" if success else "red"
-                
-                self.console.print(f"[{status_color}]{status_symbol}[/{status_color}] SLURM job {job_id} logs saved to:")
-                
+
+                self.console.print(
+                    f"[{status_color}]{status_symbol}[/{status_color}] SLURM job {job_id} logs saved to:"
+                )
+
                 for out_file in output_files:
                     self.console.print(f"  [cyan]→[/cyan] Output: {out_file}")
-                    
+
                 for err_file in error_files:
                     # Check if error file has content
                     if os.path.exists(err_file) and os.path.getsize(err_file) > 0:
                         self.console.print(f"  [yellow]→[/yellow] Errors: {err_file}")
-                
+
                 if not success and error_files:
                     # Show last few lines of error file for failed jobs
                     for err_file in error_files:
                         if os.path.exists(err_file) and os.path.getsize(err_file) > 0:
-                            self.console.print(f"\n[yellow]Last 10 lines of error log:[/yellow]")
+                            self.console.print(
+                                f"\n[yellow]Last 10 lines of error log:[/yellow]"
+                            )
                             try:
-                                with open(err_file, 'r') as f:
+                                with open(err_file, "r") as f:
                                     lines = f.readlines()
                                     for line in lines[-10:]:
                                         if line.strip():
@@ -975,10 +1014,14 @@ def _show_log_summary(self, job_id: str, success: bool = True):
                                 pass
                             break  # Only show first error file
             else:
-                self.console.print(f"[dim yellow]Note: Log files for job {job_id} not found in {output_dir}[/dim yellow]")
-                
+                self.console.print(
+                    f"[dim yellow]Note: Log files for job {job_id} not found in {output_dir}[/dim yellow]"
+                )
+
         except Exception as e:
-            self.console.print(f"[dim yellow]Note: Could not locate log files: {e}[/dim yellow]")
+            self.console.print(
+                f"[dim yellow]Note: Could not locate log files: {e}[/dim yellow]"
+            )
 
     def _check_job_completion(self, job_id: str) -> DeploymentResult:
         """Check completed job status using sacct (locally).
@@ -1012,11 +1055,13 @@ def _check_job_completion(self, job_id: str) -> DeploymentResult:
 
             if result.returncode == 0:
                 status = result.stdout.strip().upper()
-                self.console.print(f"[dim]SLURM job {job_id} final status: {status}[/dim]")
-                
+                self.console.print(
+                    f"[dim]SLURM job {job_id} final status: {status}[/dim]"
+                )
+
                 # Check if live output is enabled
                 live_output = self.config.additional_context.get("live_output", False)
-                
+
                 if "COMPLETED" in status:
                     # Show final output or summary based on live_output flag
                     if live_output:
@@ -1082,9 +1127,13 @@ def _build_perf_entry_from_aggregated(
 
         run_details = {
             "model": model_info.get("name", aggregated_record.get("model", "")),
-            "n_gpus": str(aggregated_record.get("n_gpus", self.nodes * self.gpus_per_node)),
+            "n_gpus": str(
+                aggregated_record.get("n_gpus", self.nodes * self.gpus_per_node)
+            ),
             "nnodes": str(aggregated_record.get("nnodes", self.nodes)),
-            "gpus_per_node": str(aggregated_record.get("gpus_per_node", self.gpus_per_node)),
+            "gpus_per_node": str(
+                aggregated_record.get("gpus_per_node", self.gpus_per_node)
+            ),
             "training_precision": model_info.get("training_precision", ""),
             "pipeline": get_pipeline(),
             "args": model_info.get("args", ""),
@@ -1109,7 +1158,9 @@ def _build_perf_entry_from_aggregated(
             "data_size": "",
             "data_download_duration": "",
             "build_number": get_build_number(),
-            "additional_docker_run_options": model_info.get("additional_docker_run_options", ""),
+            "additional_docker_run_options": model_info.get(
+                "additional_docker_run_options", ""
+            ),
         }
         flatten_tags(run_details)
 
@@ -1164,7 +1215,9 @@ def _build_common_info_dict(
             "data_size": "",
             "data_download_duration": "",
             "build_number": get_build_number(),
-            "additional_docker_run_options": model_info.get("additional_docker_run_options", ""),
+            "additional_docker_run_options": model_info.get(
+                "additional_docker_run_options", ""
+            ),
         }
         flatten_tags(result)
         return result
@@ -1202,7 +1255,9 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
         built_models_dict = self.manifest.get("built_models") or {}
         model_info_for_path = built_models_dict.get(model_key, {}) if model_key else {}
         model_name_for_path = model_info_for_path.get("name", model_key or "unknown")
-        model_name = model_key or "unknown"  # image key for build_info / model_info_for_entry lookups
+        model_name = (
+            model_key or "unknown"
+        )  # image key for build_info / model_info_for_entry lookups
 
         build_info = {}
         built_images = self.manifest.get("built_images") or {}
@@ -1218,7 +1273,9 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
 
         # Gather log content per node: from job_dir/node_N/ (new) or flat output_dir .out files
         per_node_log_contents: List[tuple] = []
-        flat_out_files = sorted(self.output_dir.glob(f"madengine-*_{deployment_id}_*.out"))
+        flat_out_files = sorted(
+            self.output_dir.glob(f"madengine-*_{deployment_id}_*.out")
+        )
         # Multi-node: only use explicit node logs (_node_N.out) to avoid also picking up
         # SBATCH %t output (madengine-*_<jobid>_0.out, _1.out), which would duplicate metrics.
         if self.nodes > 1:
@@ -1249,7 +1306,9 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
 
         # Multi-node: keep only log entries for actual node indices [0, nodes-1]
         if self.nodes > 1:
-            per_node_log_contents = [(n, c) for n, c in per_node_log_contents if n < self.nodes]
+            per_node_log_contents = [
+                (n, c) for n, c in per_node_log_contents if n < self.nodes
+            ]
 
         # Copy flat logs into job_dir/node_<task>/ for consistency if not already there.
         # Only create dirs for indices in [0, nodes-1] so we never create extra node_2, etc.
@@ -1293,9 +1352,11 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                 )
 
         run_details_dict: Optional[Dict[str, Any]] = None
-        model_info_for_entry = (self.manifest.get("built_models") or {}).get(
-            model_key, {}
-        ) if model_key else {}
+        model_info_for_entry = (
+            (self.manifest.get("built_models") or {}).get(model_key, {})
+            if model_key
+            else {}
+        )
 
         # Multiple results path: resolve CSV from job_dir/node_*, then cwd/run_directory
         mult_res = model_info_for_entry.get("multiple_results")
@@ -1346,22 +1407,29 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                 )
                 results["perf_files"] = [str(Path("perf.csv").resolve())]
                 import csv as _csv
+
                 try:
-                    with open(resolved_csv, "r", encoding="utf-8", errors="ignore") as f:
+                    with open(
+                        resolved_csv, "r", encoding="utf-8", errors="ignore"
+                    ) as f:
                         reader = _csv.DictReader(f)
                         for row in reader:
                             row = {k.strip(): v for k, v in row.items() if k}
                             if row.get("performance") and row.get("metric"):
-                                results["successful_runs"].append({
-                                    "model": model_info_for_entry.get("name", "") + "_" + row.get("model", ""),
-                                    "status": "SUCCESS",
-                                    "performance": str(row.get("performance", "")),
-                                    "metric": row.get("metric", ""),
-                                    "duration": row.get("test_duration", ""),
-                                    "gpu_arch": gpu_arch,
-                                    "deployment": "slurm",
-                                    "machine": deployment_id,
-                                })
+                                results["successful_runs"].append(
+                                    {
+                                        "model": model_info_for_entry.get("name", "")
+                                        + "_"
+                                        + row.get("model", ""),
+                                        "status": "SUCCESS",
+                                        "performance": str(row.get("performance", "")),
+                                        "metric": row.get("metric", ""),
+                                        "duration": row.get("test_duration", ""),
+                                        "gpu_arch": gpu_arch,
+                                        "deployment": "slurm",
+                                        "machine": deployment_id,
+                                    }
+                                )
                 except Exception:
                     pass
                 self.console.print(
@@ -1452,9 +1520,13 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
             perf_csv_path = "perf.csv"
             self._ensure_perf_csv_exists()
             if run_details_dict.get("status") == "SUCCESS":
-                update_perf_csv(perf_csv=perf_csv_path, single_result=str(perf_entry_path))
+                update_perf_csv(
+                    perf_csv=perf_csv_path, single_result=str(perf_entry_path)
+                )
             else:
-                update_perf_csv(perf_csv=perf_csv_path, exception_result=str(perf_entry_path))
+                update_perf_csv(
+                    perf_csv=perf_csv_path, exception_result=str(perf_entry_path)
+                )
             try:
                 scripts_path = model_info_for_entry.get("scripts", "")
                 scripts_base_dir = scripts_base_dir_from(scripts_path)
@@ -1476,7 +1548,9 @@ def collect_results(self, deployment_id: str) -> Dict[str, Any]:
                     num_entries=num_entries,
                 )
             except Exception as e:
-                self.console.print(f"[yellow]⚠ Could not update perf_super: {e}[/yellow]")
+                self.console.print(
+                    f"[yellow]⚠ Could not update perf_super: {e}[/yellow]"
+                )
             results["perf_files"] = [str(Path(perf_csv_path).resolve())]
             run_data = {
                 "model": run_details_dict.get("model", ""),
@@ -1558,13 +1632,10 @@ def _collect_results_parse_perf_csv(
     def cleanup(self, deployment_id: str) -> bool:
         """Cancel SLURM job if still running (locally)."""
         try:
-            subprocess.run(
-                ["scancel", deployment_id], capture_output=True, timeout=10
-            )
+            subprocess.run(["scancel", deployment_id], capture_output=True, timeout=10)
             self.console.print(f"[yellow]Cancelled SLURM job: {deployment_id}[/yellow]")
             return True
 
         except Exception as e:
             self.console.print(f"[yellow]⚠ Cleanup warning: {e}[/yellow]")
             return False
-
diff --git a/src/madengine/deployment/slurm_node_selector.py b/src/madengine/deployment/slurm_node_selector.py
index 408e8d3c..4435d5c3 100644
--- a/src/madengine/deployment/slurm_node_selector.py
+++ b/src/madengine/deployment/slurm_node_selector.py
@@ -23,6 +23,7 @@
 
 class NodeHealth(Enum):
     """Health status of a compute node."""
+
     CLEAN = "clean"  # No stale processes, ready to use
     DIRTY = "dirty"  # Has stale Ray/vLLM processes
     UNREACHABLE = "unreachable"  # Cannot connect to node
@@ -32,18 +33,19 @@ class NodeHealth(Enum):
 @dataclass
 class NodeStatus:
     """Status of a compute node's GPUs."""
+
     node: str
     health: NodeHealth
     gpu_memory_used_gb: float
     gpu_memory_total_gb: float
     process_count: int
     error_message: Optional[str] = None
-    
+
     @property
     def memory_free_gb(self) -> float:
         """Calculate free GPU memory."""
         return self.gpu_memory_total_gb - self.gpu_memory_used_gb
-    
+
     @property
     def memory_usage_percent(self) -> float:
         """Calculate memory usage percentage."""
@@ -55,17 +57,17 @@ def memory_usage_percent(self) -> float:
 class SlurmNodeSelector:
     """
     Selects clean GPU nodes for SLURM job allocation.
-    
+
     Checks candidate nodes for stale Ray/vLLM processes that would cause
     OOM errors. Can automatically clean dirty nodes or recommend exclusion.
     """
-    
+
     # Memory threshold: nodes with >50GB used are considered dirty
     MEMORY_THRESHOLD_GB = 50.0
-    
+
     # Process patterns that indicate stale processes
     STALE_PATTERNS = ["ray::", "RayWorkerWrapper", "raylet", "vllm"]
-    
+
     def __init__(
         self,
         console: Optional[Console] = None,
@@ -75,7 +77,7 @@ def __init__(
     ):
         """
         Initialize node selector.
-        
+
         Args:
             console: Rich console for output
             auto_cleanup: Automatically clean dirty nodes
@@ -86,7 +88,7 @@ def __init__(
         self.auto_cleanup = auto_cleanup
         self.verbose = verbose
         self.timeout = timeout
-    
+
     # Max candidates to check (avoids excessive checks on large clusters)
     MAX_CANDIDATES_CAP = 100
 
@@ -111,11 +113,14 @@ def get_candidate_nodes(
         """
         cmd = [
             "sinfo",
-            "-p", partition,
+            "-p",
+            partition,
             "-N",  # Node-oriented format
             "-h",  # No header
-            "-o", "%N",  # Node name only
-            "-t", "idle",  # Idle nodes only
+            "-o",
+            "%N",  # Node name only
+            "-t",
+            "idle",  # Idle nodes only
         ]
 
         if constraint:
@@ -138,14 +143,14 @@ def get_candidate_nodes(
 
             # Parse nodes
             all_nodes = set()
-            for line in result.stdout.strip().split('\n'):
+            for line in result.stdout.strip().split("\n"):
                 line = line.strip()
                 if line:
                     all_nodes.add(line)
 
             # Remove excluded nodes
             if exclude:
-                excluded = set(exclude.split(','))
+                excluded = set(exclude.split(","))
                 all_nodes -= excluded
 
             # Return all idle nodes, capped to avoid excessive checks
@@ -159,8 +164,10 @@ def get_candidate_nodes(
             if self.verbose:
                 self.console.print(f"[yellow]⚠ Query failed: {e}[/yellow]")
             return None
-    
-    def check_node_health(self, node: str, job_name: Optional[str] = None) -> NodeStatus:
+
+    def check_node_health(
+        self, node: str, job_name: Optional[str] = None
+    ) -> NodeStatus:
         """
         Check GPU health on a node using srun.
 
@@ -218,7 +225,7 @@ def check_node_health(self, node: str, job_name: Optional[str] = None) -> NodeSt
                 text=True,
                 timeout=self.timeout,
             )
-            
+
             if result.returncode != 0:
                 return NodeStatus(
                     node=node,
@@ -228,27 +235,31 @@ def check_node_health(self, node: str, job_name: Optional[str] = None) -> NodeSt
                     process_count=0,
                     error_message=f"srun failed: {result.stderr[:100]}",
                 )
-            
+
             # Parse output
             output = result.stdout
-            
+
             # Extract GPU info
-            gpu_info = self._extract_section(output, "===GPU_INFO===", "===END_GPU_INFO===")
-            processes = self._extract_section(output, "===PROCESSES===", "===END_PROCESSES===")
-            
+            gpu_info = self._extract_section(
+                output, "===GPU_INFO===", "===END_GPU_INFO==="
+            )
+            processes = self._extract_section(
+                output, "===PROCESSES===", "===END_PROCESSES==="
+            )
+
             # Parse GPU memory (simplified - in production would parse actual output)
             # For MI300X: typically 192GB per GPU
             total_memory_gb = 192.0 * 4  # Assume 4 GPUs
-            
+
             # Count processes
             process_count = 0
             if processes and "NO_PROCESSES" not in processes:
-                process_count = len([l for l in processes.split('\n') if l.strip()])
-            
+                process_count = len([l for l in processes.split("\n") if l.strip()])
+
             # Estimate memory usage
             # Rough heuristic: each process uses ~45GB (observed from Job 2437)
             used_memory_gb = process_count * 45.0
-            
+
             # Determine health
             if process_count == 0:
                 health = NodeHealth.CLEAN
@@ -256,7 +267,7 @@ def check_node_health(self, node: str, job_name: Optional[str] = None) -> NodeSt
                 health = NodeHealth.DIRTY
             else:
                 health = NodeHealth.CLEAN  # Minor processes, should be OK
-            
+
             return NodeStatus(
                 node=node,
                 health=health,
@@ -264,7 +275,7 @@ def check_node_health(self, node: str, job_name: Optional[str] = None) -> NodeSt
                 gpu_memory_total_gb=total_memory_gb,
                 process_count=process_count,
             )
-            
+
         except subprocess.TimeoutExpired:
             return NodeStatus(
                 node=node,
@@ -283,7 +294,7 @@ def check_node_health(self, node: str, job_name: Optional[str] = None) -> NodeSt
                 process_count=0,
                 error_message=str(e)[:100],
             )
-    
+
     def cleanup_node(self, node: str, job_name: Optional[str] = None) -> bool:
         """
         Clean up stale processes on a node using srun.
@@ -332,19 +343,21 @@ def cleanup_node(self, node: str, job_name: Optional[str] = None) -> bool:
                 text=True,
                 timeout=self.timeout,
             )
-            
+
             success = result.returncode == 0 and "CLEANUP_OK" in result.stdout
-            
+
             if success and self.verbose:
                 self.console.print(f"[green]    ✓ Cleaned {node}[/green]")
-            
+
             return success
-            
+
         except Exception as e:
             if self.verbose:
-                self.console.print(f"[yellow]    ⚠ Cleanup failed for {node}: {e}[/yellow]")
+                self.console.print(
+                    f"[yellow]    ⚠ Cleanup failed for {node}: {e}[/yellow]"
+                )
             return False
-    
+
     def select_nodes(
         self,
         partition: str,
@@ -376,10 +389,14 @@ def select_nodes(
         )
 
         # Unique job name for all health-check srun invocations (enables cleanup)
-        self._health_check_job_name = f"madengine_nodecheck_{os.getpid()}_{int(time.time())}"
+        self._health_check_job_name = (
+            f"madengine_nodecheck_{os.getpid()}_{int(time.time())}"
+        )
 
         # Get all idle candidate nodes
-        candidates = self.get_candidate_nodes(partition, nodes_needed, exclude, constraint)
+        candidates = self.get_candidate_nodes(
+            partition, nodes_needed, exclude, constraint
+        )
 
         if not candidates:
             self.console.print(
@@ -389,7 +406,9 @@ def select_nodes(
             return [], exclude or ""
 
         if self.verbose:
-            self.console.print(f"[dim]Idle candidates: {len(candidates)} (checking on-demand until {nodes_needed} clean)[/dim]\n")
+            self.console.print(
+                f"[dim]Idle candidates: {len(candidates)} (checking on-demand until {nodes_needed} clean)[/dim]\n"
+            )
 
         # On-demand check: stop as soon as we have enough clean nodes
         statuses: List[NodeStatus] = []
@@ -434,22 +453,30 @@ def select_nodes(
                 self.console.print("[yellow]Running automatic cleanup...[/yellow]\n")
                 for status in dirty_nodes:
                     self.console.print(f"  Cleaning {status.node}...")
-                    if self.cleanup_node(status.node, job_name=self._health_check_job_name):
+                    if self.cleanup_node(
+                        status.node, job_name=self._health_check_job_name
+                    ):
                         time.sleep(2)
-                        new_status = self.check_node_health(status.node, job_name=self._health_check_job_name)
+                        new_status = self.check_node_health(
+                            status.node, job_name=self._health_check_job_name
+                        )
                         if new_status.health == NodeHealth.CLEAN:
                             clean_nodes.append(new_status.node)
                             nodes_to_exclude.discard(status.node)
-                            self.console.print(f"    [green]✓ {status.node} is now clean[/green]")
+                            self.console.print(
+                                f"    [green]✓ {status.node} is now clean[/green]"
+                            )
                         else:
-                            self.console.print(f"    [red]✗ {status.node} still dirty[/red]")
+                            self.console.print(
+                                f"    [red]✗ {status.node} still dirty[/red]"
+                            )
                     else:
                         self.console.print(f"    [red]✗ Cleanup failed[/red]")
 
         # Build updated exclude list (dirty + unreachable + unknown)
-        existing_exclude = set(exclude.split(',')) if exclude else set()
+        existing_exclude = set(exclude.split(",")) if exclude else set()
         existing_exclude.update(nodes_to_exclude)
-        updated_exclude = ','.join(sorted(existing_exclude))
+        updated_exclude = ",".join(sorted(existing_exclude))
 
         if unreachable_nodes or unknown_nodes:
             bad = [s.node for s in unreachable_nodes] + [s.node for s in unknown_nodes]
@@ -473,17 +500,17 @@ def select_nodes(
                 f"\n[yellow]⚠ Only {len(clean_nodes)} clean nodes found "
                 f"(need {nodes_needed})[/yellow]"
             )
-            self.console.print("[yellow]Job may wait for additional nodes to become available[/yellow]\n")
-        else:
             self.console.print(
-                "\n[red]❌ No clean nodes available[/red]"
+                "[yellow]Job may wait for additional nodes to become available[/yellow]\n"
             )
+        else:
+            self.console.print("\n[red]❌ No clean nodes available[/red]")
             self.console.print(
                 "[yellow]Recommendation: Wait for nodes to be cleaned or run manual cleanup[/yellow]\n"
             )
 
         return clean_nodes, updated_exclude
-    
+
     def _extract_section(self, text: str, start_marker: str, end_marker: str) -> str:
         """Extract section between markers."""
         try:
@@ -492,17 +519,17 @@ def _extract_section(self, text: str, start_marker: str, end_marker: str) -> str
             return text[start:end].strip()
         except ValueError:
             return ""
-    
+
     def _display_status_table(self, statuses: List[NodeStatus]):
         """Display node status in a table."""
         table = Table(title="Node Health Status")
-        
+
         table.add_column("Node", style="cyan", no_wrap=True)
         table.add_column("Health", style="bold")
         table.add_column("Memory Used", justify="right")
         table.add_column("Processes", justify="right")
         table.add_column("Notes", style="dim")
-        
+
         for status in statuses:
             health_style = {
                 NodeHealth.CLEAN: "green",
@@ -510,18 +537,24 @@ def _display_status_table(self, statuses: List[NodeStatus]):
                 NodeHealth.UNREACHABLE: "red",
                 NodeHealth.UNKNOWN: "dim",
             }[status.health]
-            
+
             health_text = {
                 NodeHealth.CLEAN: "✓ Clean",
                 NodeHealth.DIRTY: "⚠ Dirty",
                 NodeHealth.UNREACHABLE: "✗ Unreachable",
                 NodeHealth.UNKNOWN: "? Unknown",
             }[status.health]
-            
-            memory_text = f"{status.gpu_memory_used_gb:.0f} GB" if status.gpu_memory_used_gb > 0 else "-"
-            processes_text = str(status.process_count) if status.process_count > 0 else "-"
+
+            memory_text = (
+                f"{status.gpu_memory_used_gb:.0f} GB"
+                if status.gpu_memory_used_gb > 0
+                else "-"
+            )
+            processes_text = (
+                str(status.process_count) if status.process_count > 0 else "-"
+            )
             notes = status.error_message if status.error_message else ""
-            
+
             table.add_row(
                 status.node,
                 f"[{health_style}]{health_text}[/{health_style}]",
@@ -529,12 +562,14 @@ def _display_status_table(self, statuses: List[NodeStatus]):
                 processes_text,
                 notes,
             )
-        
+
         self.console.print(table)
         self.console.print()
 
     @staticmethod
-    def cancel_health_check_jobs(job_name: Optional[str], console: Optional[Console] = None) -> None:
+    def cancel_health_check_jobs(
+        job_name: Optional[str], console: Optional[Console] = None
+    ) -> None:
         """
         Cancel any SLURM jobs created by the node health check (srun invocations).
 
@@ -568,6 +603,8 @@ def cancel_health_check_jobs(job_name: Optional[str], console: Optional[Console]
                         timeout=5,
                     )
             if job_ids and _console:
-                _console.print(f"[dim]Cancelled {len(job_ids)} health-check job(s)[/dim]")
+                _console.print(
+                    f"[dim]Cancelled {len(job_ids)} health-check job(s)[/dim]"
+                )
         except Exception:
             pass
diff --git a/src/madengine/deployment/templates/kubernetes/configmap.yaml.j2 b/src/madengine/deployment/templates/kubernetes/configmap.yaml.j2
index 4b782832..21d28ed2 100644
--- a/src/madengine/deployment/templates/kubernetes/configmap.yaml.j2
+++ b/src/madengine/deployment/templates/kubernetes/configmap.yaml.j2
@@ -35,4 +35,3 @@ data:
 {{ script_content | indent(4, first=True) }}
   {% endfor %}
   {% endif %}
-
diff --git a/src/madengine/deployment/templates/kubernetes/job.yaml.j2 b/src/madengine/deployment/templates/kubernetes/job.yaml.j2
index 320d049f..bbfebdc6 100644
--- a/src/madengine/deployment/templates/kubernetes/job.yaml.j2
+++ b/src/madengine/deployment/templates/kubernetes/job.yaml.j2
@@ -38,14 +38,14 @@ spec:
       {% if host_ipc %}
       hostIPC: true
       {% endif %}
-      
+
       {% if image_pull_secrets and image_pull_secrets|length > 0 %}
       imagePullSecrets:
       {% for ips in image_pull_secrets %}
       - name: {{ ips.name }}
       {% endfor %}
       {% endif %}
-      
+
       # Init container extracts madengine scripts from package
       initContainers:
       - name: extract-scripts
@@ -55,7 +55,7 @@ spec:
           - |
             set -e
             echo "=== Extracting madengine scripts ==="
-            
+
             # Extract common scripts from ConfigMap (since madengine not installed in container)
             {% if common_script_contents %}
             echo "Extracting common scripts from ConfigMap..."
@@ -68,7 +68,7 @@ spec:
             {% else %}
             echo "No common scripts to extract"
             {% endif %}
-            
+
             # Copy K8s data provider script from ConfigMap if it exists
             if [ -f /config/data_provider.sh ]; then
                 echo "Copying data_provider.sh to /workspace/data_provider.sh"
@@ -76,7 +76,7 @@ spec:
                 chmod +x /workspace/data_provider.sh
                 echo "✓ Copied K8s data provider script"
             fi
-            
+
             # Extract model scripts directory (all .sh, .py, and .json files)
             {% if model_scripts_contents %}
             echo "Extracting model scripts directory..."
@@ -97,7 +97,7 @@ spec:
             {% else %}
             echo "Warning: No model scripts configured"
             {% endif %}
-            
+
             echo "✓ Script extraction complete"
         volumeMounts:
         - name: workspace
@@ -105,7 +105,7 @@ spec:
         - name: config
           mountPath: /config
           readOnly: true
-      
+
       # Main container runs benchmark
       containers:
       - name: {{ main_container_name }}
@@ -124,7 +124,7 @@ spec:
             echo "Launcher: {{ launcher_type }}"
             {% endif %}
             echo "==================================================================="
-            
+
             # Copy config files from ConfigMap to workspace
             cp /config/build_manifest.json /workspace/
             {% if include_credential_in_configmap %}
@@ -135,14 +135,14 @@ spec:
             echo '{}' > /workspace/credential.json
             {% endif %}
             cp /config/data.json /workspace/ 2>/dev/null || true
-            
+
             # GPU Information
             if command -v rocm-smi &> /dev/null; then
                 echo ""
                 echo "=== AMD GPU Information ==="
                 rocm-smi || true
             fi
-            
+
             # Set GPU visibility for ROCm/CUDA
             # CRITICAL: Ray (vLLM, SGLang) requires ONLY ONE visibility variable
             # - AMD GPUs: Use ONLY HIP_VISIBLE_DEVICES
@@ -174,13 +174,13 @@ spec:
             export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-{{ gpu_visibility }}}
             {% endif %}
             export MAD_SYSTEM_GPU_ARCHITECTURE={{ gpu_architecture }}
-            
+
             # K8s environment
             export MAD_K8S_POD_NAME=$HOSTNAME
             export MAD_K8S_NAMESPACE={{ namespace }}
             export MAD_K8S_JOB=true
             export MAD_DEPLOYMENT_TYPE=kubernetes
-            
+
             {% if launcher_type == "torchrun" or launcher_type == "deepspeed" or launcher_type == "megatron" or launcher_type == "primus" or launcher_type == "torchtitan" %}
             # {{ launcher_type }} distributed environment (auto-configured from K8s)
             {% if nnodes > 1 %}
@@ -192,7 +192,7 @@ spec:
             export JOB_COMPLETION_INDEX=0
             {% endif %}
             {% endif %}
-            
+
             # Data provider environment variables
             {% if data_config %}
             echo ""
@@ -203,7 +203,7 @@ spec:
             {% endfor %}
             echo "✓ Data environment configured for: {{ data_config.data_name }}"
             {% endif %}
-            
+
             # Tools configuration environment variables
             {% if tools_config %}
             echo ""
@@ -218,14 +218,14 @@ spec:
             {% endfor %}
             echo "✓ Tools configuration applied"
             {% endif %}
-            
+
             {% if launcher_command %}
             # Launcher-based execution with tools
             echo ""
             echo "=== Starting benchmark with {{ launcher_type }} ==="
-            
+
             cd /workspace
-            
+
             # Download data if data provider is configured
             {% if data_provider_script and data_config %}
             echo ""
@@ -233,14 +233,14 @@ spec:
             echo "Data name: {{ data_config.data_name }}"
             echo "Source: {{ data_config.source_url }}"
             echo "Target: {{ data_config.datahome }}"
-            
+
             # Use K8s data provider script (loaded from ConfigMap)
             if [ -f /workspace/data_provider.sh ]; then
                 bash /workspace/data_provider.sh \
                   "{{ data_config.data_name }}" \
                   "{{ data_config.source_url }}" \
                   "{{ data_config.datahome }}"
-                
+
                 # Source metrics if available
                 if [ -f /tmp/mad_metrics.env ]; then
                     source /tmp/mad_metrics.env
@@ -251,7 +251,7 @@ spec:
                 exit 1
             fi
             {% endif %}
-            
+
             # Run pre-scripts (like local execution)
             {% if pre_scripts %}
             echo ""
@@ -269,7 +269,7 @@ spec:
             {% else %}
             echo "No pre-scripts configured"
             {% endif %}
-            
+
             # Clear MIOpen cache to prevent "Duplicate ID" warnings
             echo ""
             echo "=== Clearing MIOpen cache ==="
@@ -278,10 +278,10 @@ spec:
                 echo "✓ Cleared MIOpen cache directory"
             fi
             mkdir -p "${MIOPEN_USER_DB_PATH:-/tmp/.miopen}"
-            
+
             # Primus: experiment YAMLs are in the ConfigMap as Primus/examples/... and extracted
             # to /workspace/Primus (see madengine _bundle_primus_k8s_examples_overlay); PRIMUS_ROOT=/workspace/Primus.
-            
+
             # Create wrapper script for launcher
             echo ""
             echo "=== Running model benchmark with launcher ==="
@@ -290,7 +290,7 @@ spec:
             {{ launcher_command | indent(12, first=False) }}
             LAUNCHER_EOF
             chmod +x /tmp/run_launcher.sh
-            
+
             {% if tools_config and tools_config|length > 0 %}
             # Run with profiling tools
             {%   for tool in tools_config %}
@@ -299,7 +299,7 @@ spec:
             {%     endif %}
             {%   endfor %}
             {% endif %}
-            
+
             # Execute launcher with tool chain
             MODEL_START_TIME=$(date +%s.%N)
             {% if launcher_tool_chain and launcher_tool_chain != "bash /tmp/run_launcher.sh" %}
@@ -311,7 +311,7 @@ spec:
             MODEL_END_TIME=$(date +%s.%N)
             MODEL_DURATION=$(awk "BEGIN {printf \"%.6f\", $MODEL_END_TIME - $MODEL_START_TIME}")
             echo "test_duration: ${MODEL_DURATION}s"
-            
+
             # Run post-scripts (like local execution)
             {% if post_scripts %}
             echo ""
@@ -329,12 +329,12 @@ spec:
             {% else %}
             echo "No post-scripts configured"
             {% endif %}
-            
+
             # Copy artifacts to PVC shared storage (always enabled)
             echo ""
             echo "=== Copying artifacts to PVC storage ==="
             mkdir -p /results/${HOSTNAME}
-            
+
             # Copy performance results
             if [ -f "perf.csv" ]; then
                 cp perf.csv /results/${HOSTNAME}/perf.csv
@@ -363,13 +363,13 @@ spec:
                 fi
             fi
             {% endif %}
-            
+
             # Copy environment details
             if ls *_env.csv 1> /dev/null 2>&1; then
                 cp *_env.csv /results/${HOSTNAME}/
                 echo "✓ Copied environment CSV files"
             fi
-            
+
             # Copy profiling outputs (rocprof, rocprofv3)
             if ls results* 1> /dev/null 2>&1; then
                 cp -r results* /results/${HOSTNAME}/ 2>/dev/null || true
@@ -391,7 +391,7 @@ spec:
                 cp -r rocm_trace_lite_output /results/${HOSTNAME}/ 2>/dev/null || true
                 echo "✓ Copied rocm_trace_lite_output"
             fi
-            
+
             # Copy tool-specific outputs
             if ls gpu_info_*.csv 1> /dev/null 2>&1; then
                 cp gpu_info_*.csv /results/${HOSTNAME}/
@@ -405,15 +405,15 @@ spec:
                 cp prof.csv /results/${HOSTNAME}/
                 echo "✓ Copied prof.csv"
             fi
-            
+
             echo "✓ All artifacts copied to PVC: /results/${HOSTNAME}/"
-            
+
             echo "=== Benchmark job completed with exit code $MODEL_EXIT_CODE ==="
             exit $MODEL_EXIT_CODE
             {% else %}
             # Direct script execution
             cd /workspace
-            
+
             # Download data if data provider is configured
             {% if data_provider_script and data_config %}
             echo ""
@@ -421,14 +421,14 @@ spec:
             echo "Data name: {{ data_config.data_name }}"
             echo "Source: {{ data_config.source_url }}"
             echo "Target: {{ data_config.datahome }}"
-            
+
             # Use K8s data provider script (loaded from ConfigMap)
             if [ -f /workspace/data_provider.sh ]; then
                 bash /workspace/data_provider.sh \
                   "{{ data_config.data_name }}" \
                   "{{ data_config.source_url }}" \
                   "{{ data_config.datahome }}"
-                
+
                 # Source metrics if available
                 if [ -f /tmp/mad_metrics.env ]; then
                     source /tmp/mad_metrics.env
@@ -439,7 +439,7 @@ spec:
                 exit 1
             fi
             {% endif %}
-            
+
             # Run pre-scripts (like local execution)
             {% if pre_scripts %}
             echo ""
@@ -457,7 +457,7 @@ spec:
             {% else %}
             echo "No pre-scripts configured"
             {% endif %}
-            
+
             # Clear MIOpen cache to prevent "Duplicate ID" warnings
             echo ""
             echo "=== Clearing MIOpen cache ==="
@@ -466,7 +466,7 @@ spec:
                 echo "✓ Cleared MIOpen cache directory"
             fi
             mkdir -p "${MIOPEN_USER_DB_PATH:-/tmp/.miopen}"
-            
+
             # Run main model script
             echo ""
             echo "=== Running model benchmark script ==="
@@ -479,7 +479,7 @@ spec:
                 {%     endif %}
                 {%   endfor %}
                 {% endif %}
-                
+
                 # Execute script with tool chain
                 MODEL_START_TIME=$(date +%s.%N)
                 {% if direct_script_tool_chain and direct_script_tool_chain != "bash " ~ model_script %}
@@ -500,7 +500,7 @@ spec:
                 ls -la /workspace/scripts/ 2>/dev/null || echo "scripts/ directory not found"
                 exit 1
             fi
-            
+
             # Run post-scripts (like local execution)
             {% if post_scripts %}
             echo ""
@@ -518,12 +518,12 @@ spec:
             {% else %}
             echo "No post-scripts configured"
             {% endif %}
-            
+
             # Copy artifacts to PVC shared storage (always enabled)
             echo ""
             echo "=== Copying artifacts to PVC storage ==="
             mkdir -p /results/${HOSTNAME}
-            
+
             # Copy performance results
             if [ -f "perf.csv" ]; then
                 cp perf.csv /results/${HOSTNAME}/perf.csv
@@ -552,13 +552,13 @@ spec:
                 fi
             fi
             {% endif %}
-            
+
             # Copy environment details
             if ls *_env.csv 1> /dev/null 2>&1; then
                 cp *_env.csv /results/${HOSTNAME}/
                 echo "✓ Copied environment CSV files"
             fi
-            
+
             # Copy profiling outputs (rocprof, rocprofv3)
             if ls results* 1> /dev/null 2>&1; then
                 cp -r results* /results/${HOSTNAME}/ 2>/dev/null || true
@@ -580,13 +580,13 @@ spec:
                 cp -r rocm_trace_lite_output /results/${HOSTNAME}/ 2>/dev/null || true
                 echo "✓ Copied rocm_trace_lite_output"
             fi
-            
+
             # Copy GPU profiler outputs
             if ls gpu_info_*.csv 1> /dev/null 2>&1; then
                 cp gpu_info_*.csv /results/${HOSTNAME}/
                 echo "✓ Copied GPU profiler outputs"
             fi
-            
+
             # Copy library trace outputs
             if ls *_trace_output.csv 1> /dev/null 2>&1; then
                 cp *_trace_output.csv /results/${HOSTNAME}/
@@ -596,20 +596,20 @@ spec:
                 cp library_trace.csv /results/${HOSTNAME}/library_trace.csv
                 echo "✓ Copied library_trace.csv"
             fi
-            
+
             # Copy tracing outputs
             if ls trace.* 1> /dev/null 2>&1; then
                 cp trace.* /results/${HOSTNAME}/ 2>/dev/null || true
                 echo "✓ Copied tracing files"
             fi
-            
+
             echo "✓ All artifacts copied to PVC: /results/${HOSTNAME}/"
-            
+
             echo ""
             echo "=== Benchmark job completed with exit code ${MODEL_EXIT_CODE:-0} ==="
             exit ${MODEL_EXIT_CODE:-0}
             {% endif %}
-        
+
         resources:
           requests:
             {{ gpu_resource_name }}: "{{ gpu_count }}"
@@ -619,13 +619,13 @@ spec:
             {{ gpu_resource_name }}: "{{ gpu_count }}"
             memory: "{{ memory_limit }}"
             cpu: "{{ cpu_limit }}"
-        
+
         env:
         {% for key, value in env_vars.items() %}
         - name: {{ key }}
           value: "{{ value }}"
         {% endfor %}
-        
+
         volumeMounts:
         - name: workspace
           mountPath: /workspace
@@ -647,7 +647,7 @@ spec:
           mountPath: /data
           readOnly: false  # Must be writable for data provider downloads
         {% endif %}
-        
+
         {% if privileged_profiling %}
         securityContext:
           capabilities:
@@ -656,7 +656,7 @@ spec:
           seccompProfile:
             type: Unconfined
         {% endif %}
-      
+
       {% if tolerations %}
       tolerations:
       {% for toleration in tolerations %}
@@ -672,7 +672,7 @@ spec:
         {% endif %}
       {% endfor %}
       {% endif %}
-      
+
       volumes:
       - name: workspace
         emptyDir: {}
@@ -699,4 +699,3 @@ spec:
         persistentVolumeClaim:
           claimName: {{ data_pvc }}
       {% endif %}
-
diff --git a/src/madengine/deployment/templates/kubernetes/pvc-data.yaml.j2 b/src/madengine/deployment/templates/kubernetes/pvc-data.yaml.j2
index c5bc8396..e6aa8d00 100644
--- a/src/madengine/deployment/templates/kubernetes/pvc-data.yaml.j2
+++ b/src/madengine/deployment/templates/kubernetes/pvc-data.yaml.j2
@@ -19,4 +19,3 @@ spec:
   {% if storage_class %}
   storageClassName: {{ storage_class }}
   {% endif %}
-
diff --git a/src/madengine/deployment/templates/kubernetes/pvc.yaml.j2 b/src/madengine/deployment/templates/kubernetes/pvc.yaml.j2
index fe1395e0..953a53c8 100644
--- a/src/madengine/deployment/templates/kubernetes/pvc.yaml.j2
+++ b/src/madengine/deployment/templates/kubernetes/pvc.yaml.j2
@@ -16,4 +16,3 @@ spec:
   {% if storage_class %}
   storageClassName: {{ storage_class }}
   {% endif %}
-
diff --git a/src/madengine/deployment/templates/kubernetes/service.yaml.j2 b/src/madengine/deployment/templates/kubernetes/service.yaml.j2
index e02836ee..e5ee9f89 100644
--- a/src/madengine/deployment/templates/kubernetes/service.yaml.j2
+++ b/src/madengine/deployment/templates/kubernetes/service.yaml.j2
@@ -17,4 +17,3 @@ spec:
     targetPort: {{ port }}
     protocol: TCP
   {% endfor %}
-
diff --git a/src/madengine/deployment/templates/slurm/job.sh.j2 b/src/madengine/deployment/templates/slurm/job.sh.j2
index 5f8e8266..3d1525d8 100644
--- a/src/madengine/deployment/templates/slurm/job.sh.j2
+++ b/src/madengine/deployment/templates/slurm/job.sh.j2
@@ -215,11 +215,11 @@ echo "Verifying madengine availability..."
 if command -v madengine >/dev/null 2>&1; then
     MAD_CLI_VERSION=$(madengine --version 2>&1 | head -n1 || echo "unknown")
     MAD_CLI_PATH=$(which madengine 2>/dev/null || echo "unknown")
-    
+
     echo "  ✓ madengine available"
     echo "  Version: $MAD_CLI_VERSION"
     echo "  Path: $MAD_CLI_PATH"
-    
+
     # Verify it's executable
     if madengine --help >/dev/null 2>&1; then
         export MAD_CLI_COMMAND="madengine"
@@ -263,15 +263,15 @@ if 'deployment_config' in manifest:
     gpus_per_node = None
     if 'slurm' in manifest['deployment_config']:
         gpus_per_node = manifest['deployment_config']['slurm'].get('gpus_per_node')
-    
+
     # Set to 'docker' instead of 'local' to force container execution
     manifest['deployment_config']['target'] = 'docker'
-    
+
     # Remove scheduler configs (but keep built_images!)
     manifest['deployment_config'].pop('slurm', None)
     manifest['deployment_config'].pop('k8s', None)
     manifest['deployment_config'].pop('kubernetes', None)
-    
+
     if gpus_per_node:
         manifest['deployment_config']['gpus_per_node'] = gpus_per_node
 
@@ -490,11 +490,11 @@ echo "Verifying madengine availability..."
 if command -v madengine >/dev/null 2>&1; then
     MAD_CLI_VERSION=$(madengine --version 2>&1 | head -n1 || echo "unknown")
     MAD_CLI_PATH=$(which madengine 2>/dev/null || echo "unknown")
-    
+
     echo "✓ madengine available"
     echo "  Version: $MAD_CLI_VERSION"
     echo "  Path: $MAD_CLI_PATH"
-    
+
     # Verify it's executable
     if madengine --help >/dev/null 2>&1; then
         echo "  ✓ Verified: madengine is functional"
@@ -542,15 +542,15 @@ if 'deployment_config' in manifest:
     gpus_per_node = None
     if 'slurm' in manifest['deployment_config']:
         gpus_per_node = manifest['deployment_config']['slurm'].get('gpus_per_node')
-    
+
     # Set to 'docker' instead of 'local' to force container execution
     manifest['deployment_config']['target'] = 'docker'
-    
+
     # Remove scheduler configs (but keep built_images!)
     manifest['deployment_config'].pop('slurm', None)
     manifest['deployment_config'].pop('k8s', None)
     manifest['deployment_config'].pop('kubernetes', None)
-    
+
     if gpus_per_node:
         manifest['deployment_config']['gpus_per_node'] = gpus_per_node
 
@@ -819,4 +819,3 @@ else
 fi
 
 exit $EXIT_CODE
-
diff --git a/src/madengine/execution/__init__.py b/src/madengine/execution/__init__.py
index c7be268e..a687f394 100644
--- a/src/madengine/execution/__init__.py
+++ b/src/madengine/execution/__init__.py
@@ -9,4 +9,3 @@
 from .container_runner import ContainerRunner
 
 __all__ = ["ContainerRunner"]
-
diff --git a/src/madengine/execution/container_runner.py b/src/madengine/execution/container_runner.py
index 2ffc8a31..1074f09e 100644
--- a/src/madengine/execution/container_runner.py
+++ b/src/madengine/execution/container_runner.py
@@ -29,7 +29,10 @@
     update_perf_csv,
     flatten_tags,
 )
-from madengine.reporting.update_perf_super import update_perf_super_json, update_perf_super_csv
+from madengine.reporting.update_perf_super import (
+    update_perf_super_json,
+    update_perf_super_csv,
+)
 from madengine.utils.gpu_config import resolve_runtime_gpus
 from madengine.utils.config_parser import ConfigParser
 from madengine.utils.path_utils import scripts_base_dir_from
@@ -86,8 +89,7 @@ def _sh(cmd: str) -> str:
         host_install_type = (
             "therock"
             if _host_rocm_path.is_dir() and is_therock_tree(_host_rocm_path)
-            else "apt install" if _host_rocm_path.is_dir()
-            else "unknown"
+            else "apt install" if _host_rocm_path.is_dir() else "unknown"
         )
         try:
             host_rocm_ver = context._get_tool_manager().get_version() or "unknown"
@@ -108,14 +110,13 @@ def _sh(cmd: str) -> str:
 
         # ROCm root: prefer rocm-sdk, then ROCM_PATH env, then /opt/rocm
         ctr_rocm_root = _sh(
-            "rocm-sdk path --root 2>/dev/null "
-            "|| echo \"${ROCM_PATH:-/opt/rocm}\""
+            "rocm-sdk path --root 2>/dev/null " '|| echo "${ROCM_PATH:-/opt/rocm}"'
         )
 
         # ROCm version: prefer rocm-sdk, then .info/version, then rocminfo
         ctr_rocm_ver = _sh(
             "rocm-sdk version 2>/dev/null "
-            "|| cat \"${ROCM_PATH:-/opt/rocm}/.info/version\" 2>/dev/null "
+            '|| cat "${ROCM_PATH:-/opt/rocm}/.info/version" 2>/dev/null '
             "|| rocminfo 2>/dev/null | grep -i 'ROCm Version' | head -n1 | sed 's/.*[Vv]ersion:[[:space:]]*//;s/[[:space:]].*//;s/[^0-9.]//g' 2>/dev/null "
             "|| echo unknown"
         )
@@ -129,14 +130,16 @@ def _sh(cmd: str) -> str:
         # ── Host side ──────────────────────────────────────────────
         def _host_sh(cmd: str) -> str:
             try:
-                return subprocess.check_output(cmd, shell=True, stderr=subprocess.DEVNULL, text=True).strip()
+                return subprocess.check_output(
+                    cmd, shell=True, stderr=subprocess.DEVNULL, text=True
+                ).strip()
             except Exception:
                 return "unknown"
 
         host_cuda_root = _host_sh(
             "nvcc --version 2>/dev/null | sed -n 's/.*release \\([0-9][0-9.]*\\).*/\\1/p' | head -1 | "
             "xargs -I{} dirname $(which nvcc 2>/dev/null) 2>/dev/null | xargs dirname 2>/dev/null "
-            "|| echo \"${CUDA_PATH:-${CUDA_HOME:-/usr/local/cuda}}\""
+            '|| echo "${CUDA_PATH:-${CUDA_HOME:-/usr/local/cuda}}"'
         )
         host_cuda_ver = _host_sh(
             "nvcc --version 2>/dev/null | sed -n 's/.*release \\([0-9][0-9.]*\\).*/\\1/p' | head -1 "
@@ -147,7 +150,7 @@ def _host_sh(cmd: str) -> str:
         # ── Container side ─────────────────────────────────────────
         ctr_cuda_root = _sh(
             "dirname $(which nvcc 2>/dev/null) 2>/dev/null | xargs dirname 2>/dev/null "
-            "|| echo \"${CUDA_PATH:-${CUDA_HOME:-/usr/local/cuda}}\""
+            '|| echo "${CUDA_PATH:-${CUDA_HOME:-/usr/local/cuda}}"'
         )
         ctr_cuda_ver = _sh(
             "nvcc --version 2>/dev/null | sed -n 's/.*release \\([0-9][0-9.]*\\).*/\\1/p' | head -1 "
@@ -164,7 +167,9 @@ def _host_sh(cmd: str) -> str:
     rich_console.print(f"[dim]{'=' * 80}[/dim]\n")
 
 
-def _resolve_multiple_results_path(multiple_results: str, model_dir: str) -> typing.Optional[str]:
+def _resolve_multiple_results_path(
+    multiple_results: str, model_dir: str
+) -> typing.Optional[str]:
     """Resolve multiple_results CSV path: try cwd then model_dir. Return first that exists."""
     if not multiple_results:
         return None
@@ -199,9 +204,7 @@ def _cp_model_dir_file_to_cwd_cmd(model_dir: str, relative_path: str) -> str:
     """``cp --`` from ``model_dir/relative`` to ``.`` with quoted paths (no injection)."""
     rel = (relative_path or "").strip()
     src = os.path.normpath(os.path.join(model_dir, rel)).replace("\\", "/")
-    return (
-        f"cp -- {_bash_quote_path(src)} {_bash_quote_path('.')} 2>/dev/null || true"
-    )
+    return f"cp -- {_bash_quote_path(src)} {_bash_quote_path('.')} 2>/dev/null || true"
 
 
 class ContainerRunner:
@@ -272,36 +275,42 @@ def create_run_details_dict(
 
         # Resolve GPU count using hierarchical resolution
         resolved_gpu_count = resolve_runtime_gpus(model_info, self.additional_context)
-        
+
         # Convert -1 (all GPUs) to actual system GPU count for accurate reporting
         if resolved_gpu_count == -1 and self.context:
             try:
-                system_ngpus = int(self.context.ctx["docker_env_vars"]["MAD_SYSTEM_NGPUS"])
+                system_ngpus = int(
+                    self.context.ctx["docker_env_vars"]["MAD_SYSTEM_NGPUS"]
+                )
                 resolved_gpu_count = system_ngpus
-                print(f"ℹ️  Converted n_gpus=-1 to actual system GPU count: {system_ngpus}")
+                print(
+                    f"ℹ️  Converted n_gpus=-1 to actual system GPU count: {system_ngpus}"
+                )
             except (KeyError, ValueError, TypeError):
                 # If system GPU count not available, keep -1
                 pass
-        
+
         # Determine number of nodes and GPUs per node
         # Priority: 1. SLURM env vars, 2. additional_context, 3. model_info, 4. default (1)
         nnodes = "1"  # Default for local execution
         gpus_per_node = str(resolved_gpu_count)
-        
+
         # Check for SLURM multi-node environment
         if os.environ.get("MAD_DEPLOYMENT_TYPE") == "slurm":
             # Get from SLURM environment variables (most accurate for SLURM jobs)
             slurm_nnodes = os.environ.get("NNODES") or os.environ.get("SLURM_NNODES")
-            slurm_gpus_per_node = os.environ.get("GPUS_PER_NODE") or os.environ.get("SLURM_GPUS_PER_NODE")
-            
+            slurm_gpus_per_node = os.environ.get("GPUS_PER_NODE") or os.environ.get(
+                "SLURM_GPUS_PER_NODE"
+            )
+
             if slurm_nnodes:
                 nnodes = str(slurm_nnodes)
                 print(f"ℹ️  Detected SLURM multi-node: {nnodes} nodes")
-            
+
             if slurm_gpus_per_node:
                 gpus_per_node = str(slurm_gpus_per_node)
                 print(f"ℹ️  GPUs per node: {gpus_per_node}")
-        
+
         # Fallback to additional_context (for non-SLURM or if env vars not set)
         if nnodes == "1" and self.additional_context:
             slurm_config = self.additional_context.get("slurm", {})
@@ -312,43 +321,43 @@ def create_run_details_dict(
                     nnodes = str(ctx_nodes)
                 if ctx_gpus:
                     gpus_per_node = str(ctx_gpus)
-        
+
         # Final fallback to model_info
         if nnodes == "1":
             nnodes = model_info.get("nnodes", "1")
-        
+
         # Calculate total GPUs
         try:
             total_gpus = int(nnodes) * int(gpus_per_node)
         except (ValueError, TypeError):
             total_gpus = resolved_gpu_count
-        
+
         # Extract launcher from multiple sources in priority order:
         # 1. additional_context (passed via --additional-context CLI arg)
         # 2. model_info distributed config (in models.json)
         # 3. MAD_LAUNCHER environment variable
         # 4. Default to 'docker' for local deployments
         launcher = ""
-        
+
         # Check additional_context first (highest priority)
         if self.additional_context:
             distributed_config = self.additional_context.get("distributed", {})
             launcher = distributed_config.get("launcher", "")
             if launcher:
                 print(f"🚀 Launcher from additional_context: {launcher}")
-        
+
         # Check model_info distributed config
         if not launcher and model_info.get("distributed"):
             launcher = model_info["distributed"].get("launcher", "")
             if launcher:
                 print(f"🚀 Launcher from model_info: {launcher}")
-        
+
         # Fallback to environment variable
         if not launcher:
             launcher = os.environ.get("MAD_LAUNCHER", "")
             if launcher:
                 print(f"🚀 Launcher from MAD_LAUNCHER env: {launcher}")
-        
+
         # Apply deployment-specific defaults if no launcher specified
         deployment_type = os.environ.get("MAD_DEPLOYMENT_TYPE", "local")
         if not launcher:
@@ -363,13 +372,15 @@ def create_run_details_dict(
             elif deployment_type == "local":
                 launcher = "docker"
                 print(f"🚀 Launcher defaulted to 'docker' for local deployment")
-        
+
         # Print final launcher selection
         if launcher:
-            print(f"✅ Final launcher selected: '{launcher}' (deployment_type: {deployment_type})")
+            print(
+                f"✅ Final launcher selected: '{launcher}' (deployment_type: {deployment_type})"
+            )
         else:
             print(f"⚠️  No launcher specified (deployment_type: {deployment_type})")
-        
+
         # Create run details dict with all required fields
         run_details = {
             "model": model_info["name"],
@@ -383,10 +394,14 @@ def create_run_details_dict(
             "docker_file": build_info.get("dockerfile", ""),
             "base_docker": build_info.get("base_docker", ""),
             "docker_sha": build_info.get("docker_sha", ""),
-            "docker_image": run_results.get("docker_image", build_info.get("docker_image", "")),
+            "docker_image": run_results.get(
+                "docker_image", build_info.get("docker_image", "")
+            ),
             "git_commit": run_results.get("git_commit", ""),
             "machine_name": run_results.get("machine_name", ""),
-            "deployment_type": os.environ.get("MAD_DEPLOYMENT_TYPE", "local"),  # local, slurm, etc.
+            "deployment_type": os.environ.get(
+                "MAD_DEPLOYMENT_TYPE", "local"
+            ),  # local, slurm, etc.
             "launcher": launcher,  # Distributed launcher: torchrun, vllm, sglang, deepspeed, etc.
             "gpu_architecture": (
                 (self.context.ctx.get("docker_env_vars") or {}).get(
@@ -420,8 +435,7 @@ def create_run_details_dict(
             scripts_base_dir = scripts_base_dir_from(scripts_path)
             config_parser = ConfigParser(scripts_base_dir=scripts_base_dir)
             run_details["configs"] = config_parser.parse_and_load(
-                model_info.get("args", ""),
-                scripts_path
+                model_info.get("args", ""), scripts_path
             )
         except Exception as e:
             print(f"⚠️  Warning: Could not parse config file: {e}")
@@ -544,40 +558,50 @@ def pull_image(
         if registry and credentials:
             self.login_to_registry(registry, credentials)
 
-        self.rich_console.print(f"\n[bold blue]📥 Starting docker pull from registry...[/bold blue]")
+        self.rich_console.print(
+            f"\n[bold blue]📥 Starting docker pull from registry...[/bold blue]"
+        )
         print(f"📍 Registry: {registry or 'Default'}")
         print(f"🏷️  Image: {registry_image}")
-        
+
         # Force fresh pull on SLURM compute nodes to avoid corrupted cached layers
         # This prevents "permission denied" errors from corrupted image layers
         deployment_type = os.environ.get("MAD_DEPLOYMENT_TYPE", "local")
         in_slurm_job = os.environ.get("MAD_IN_SLURM_JOB", "0") == "1"
-        
+
         if deployment_type == "slurm" and in_slurm_job:
-            print(f"🔄 Using fresh pull policy for SLURM compute node (prevents cached layer corruption)")
+            print(
+                f"🔄 Using fresh pull policy for SLURM compute node (prevents cached layer corruption)"
+            )
             # Remove any existing cached image to force fresh pull
             try:
                 self.console.sh(f"docker rmi -f {registry_image} 2>/dev/null || true")
                 print(f"✓ Removed cached image layers")
             except Exception:
                 pass  # It's okay if image doesn't exist
-        
+
         try:
             self.console.sh(f"docker pull {registry_image}")
 
             if local_name:
                 self.console.sh(f"docker tag {registry_image} {local_name}")
                 print(f"🏷️  Tagged as: {local_name}")
-                self.rich_console.print(f"[bold green]✅ Successfully pulled and tagged image[/bold green]")
+                self.rich_console.print(
+                    f"[bold green]✅ Successfully pulled and tagged image[/bold green]"
+                )
                 self.rich_console.print(f"[dim]{'='*80}[/dim]")
                 return local_name
 
-            self.rich_console.print(f"[bold green]✅ Successfully pulled image:[/bold green] [cyan]{registry_image}[/cyan]")
+            self.rich_console.print(
+                f"[bold green]✅ Successfully pulled image:[/bold green] [cyan]{registry_image}[/cyan]"
+            )
             self.rich_console.print(f"[dim]{'='*80}[/dim]")
             return registry_image
 
         except Exception as e:
-            self.rich_console.print(f"[red]❌ Failed to pull image {registry_image}: {e}[/red]")
+            self.rich_console.print(
+                f"[red]❌ Failed to pull image {registry_image}: {e}[/red]"
+            )
             raise
 
     def get_gpu_arg(self, requested_gpus: str) -> str:
@@ -755,7 +779,7 @@ def apply_tools(
             # Update environment variables (always apply, even if cmd is duplicate)
             if "env_vars" in tool_config:
                 run_env.update(tool_config["env_vars"])
-            
+
             # Only add cmd if it hasn't been added yet
             # This prevents duplicate wrappers like get_library_trace.py
             if "cmd" in tool_config:
@@ -763,13 +787,13 @@ def apply_tools(
                 if cmd not in added_cmds:
                     # Prepend encapsulate cmd
                     pre_encapsulate_post_scripts["encapsulate_script"] = (
-                        cmd
-                        + " "
-                        + pre_encapsulate_post_scripts["encapsulate_script"]
+                        cmd + " " + pre_encapsulate_post_scripts["encapsulate_script"]
                     )
                     added_cmds.add(cmd)
                 else:
-                    print(f"  Note: Command '{cmd}' already added by another tool, skipping duplicate.")
+                    print(
+                        f"  Note: Command '{cmd}' already added by another tool, skipping duplicate."
+                    )
 
     def run_pre_post_script(
         self, model_docker: Docker, model_dir: str, pre_post: typing.List
@@ -861,7 +885,9 @@ def run_container(
         Returns:
             dict: Execution results including performance metrics
         """
-        self.rich_console.print(f"[bold green]🏃 Running model:[/bold green] [bold cyan]{model_info['name']}[/bold cyan] [dim]in container[/dim] [yellow]{docker_image}[/yellow]")
+        self.rich_console.print(
+            f"[bold green]🏃 Running model:[/bold green] [bold cyan]{model_info['name']}[/bold cyan] [dim]in container[/dim] [yellow]{docker_image}[/yellow]"
+        )
 
         # Resolve image: if model-specific image is missing, try shared primus_pretrain image (one build for all configs)
         docker_image = self._resolve_docker_image(docker_image, model_info["name"])
@@ -933,11 +959,11 @@ def run_container(
 
         # Add environment variables
         docker_options += f"--env MAD_MODEL_NAME='{model_info['name']}' "
-        if model_info.get('multiple_results'):
-            docker_options += f"--env MAD_OUTPUT_CSV='{model_info['multiple_results']}' "
-        docker_options += (
-            f"--env JENKINS_BUILD_NUMBER='{get_build_number()}' "
-        )
+        if model_info.get("multiple_results"):
+            docker_options += (
+                f"--env MAD_OUTPUT_CSV='{model_info['multiple_results']}' "
+            )
+        docker_options += f"--env JENKINS_BUILD_NUMBER='{get_build_number()}' "
 
         # Gather data and environment
         run_env = {}
@@ -947,12 +973,14 @@ def run_container(
         # Also check shell environment for SLURM-passed variables
         if "docker_env_vars" not in self.context.ctx:
             self.context.ctx["docker_env_vars"] = {}
-        
+
         # For SLURM jobs, check shell environment and populate additional_context with GPU info
         # This ensures GPU resolution works correctly
         if os.environ.get("MAD_DEPLOYMENT_TYPE") == "slurm":
             if "NPROC_PER_NODE" in os.environ or "GPUS_PER_NODE" in os.environ:
-                gpus_per_node_str = os.environ.get("NPROC_PER_NODE") or os.environ.get("GPUS_PER_NODE")
+                gpus_per_node_str = os.environ.get("NPROC_PER_NODE") or os.environ.get(
+                    "GPUS_PER_NODE"
+                )
                 if gpus_per_node_str:
                     try:
                         gpus = int(gpus_per_node_str)
@@ -962,44 +990,65 @@ def run_container(
                             self.additional_context = {}
                         if "gpus_per_node" not in self.additional_context:
                             self.additional_context["gpus_per_node"] = gpus
-                            print(f"ℹ️  SLURM GPU override: {gpus} GPUs per node (from shell environment)")
+                            print(
+                                f"ℹ️  SLURM GPU override: {gpus} GPUs per node (from shell environment)"
+                            )
                     except ValueError:
                         pass
-        
+
         # List of environment variables to pass from shell to Docker (for SLURM jobs)
         slurm_env_vars = [
-            'MASTER_ADDR', 'MASTER_PORT', 'WORLD_SIZE', 'RANK', 'NODE_RANK',
-            'NNODES', 'NPROC_PER_NODE', 'MAD_MULTI_NODE_RUNNER',
-            'MAD_COLLECT_METRICS', 'NCCL_SOCKET_IFNAME', 'GLOO_SOCKET_IFNAME',
-            'NCCL_DEBUG', 'NCCL_IB_DISABLE', 'NCCL_NET_GDR_LEVEL',
+            "MASTER_ADDR",
+            "MASTER_PORT",
+            "WORLD_SIZE",
+            "RANK",
+            "NODE_RANK",
+            "NNODES",
+            "NPROC_PER_NODE",
+            "MAD_MULTI_NODE_RUNNER",
+            "MAD_COLLECT_METRICS",
+            "NCCL_SOCKET_IFNAME",
+            "GLOO_SOCKET_IFNAME",
+            "NCCL_DEBUG",
+            "NCCL_IB_DISABLE",
+            "NCCL_NET_GDR_LEVEL",
             # Primus launcher (config path and optional CLI extra args)
-            'PRIMUS_CONFIG_PATH', 'PRIMUS_CLI_EXTRA',
+            "PRIMUS_CONFIG_PATH",
+            "PRIMUS_CLI_EXTRA",
             # Rendezvous timeout so all nodes can join after pull
-            'TORCH_ELASTIC_RDZV_TIMEOUT',
+            "TORCH_ELASTIC_RDZV_TIMEOUT",
             # GPU visibility variables for Ray-based launchers (vLLM, SGLang)
             # CRITICAL: These must be passed to Docker for proper GPU device mapping
-            'HIP_VISIBLE_DEVICES', 'ROCR_VISIBLE_DEVICES', 'CUDA_VISIBLE_DEVICES'
+            "HIP_VISIBLE_DEVICES",
+            "ROCR_VISIBLE_DEVICES",
+            "CUDA_VISIBLE_DEVICES",
         ]
-        
+
         # Check shell environment and add to docker_env_vars
         merged_from_env = 0
         for var_name in slurm_env_vars:
             if var_name in os.environ:
                 self.context.ctx["docker_env_vars"][var_name] = os.environ[var_name]
                 merged_from_env += 1
-        
+
         # CRITICAL FIX for rocm/vllm image: Override RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES
         # The rocm/vllm Docker image has RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES=1 baked in,
         # which tells Ray to IGNORE HIP_VISIBLE_DEVICES. We must explicitly override it.
         # This is only needed if HIP_VISIBLE_DEVICES is set (indicating AMD GPU usage with Ray)
-        if 'HIP_VISIBLE_DEVICES' in self.context.ctx["docker_env_vars"]:
+        if "HIP_VISIBLE_DEVICES" in self.context.ctx["docker_env_vars"]:
             # Set to empty string to disable Ray's behavior of ignoring HIP_VISIBLE_DEVICES
-            self.context.ctx["docker_env_vars"]['RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES'] = ''
-            print("ℹ️  Overriding RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES to enable HIP_VISIBLE_DEVICES")
-        
+            self.context.ctx["docker_env_vars"][
+                "RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES"
+            ] = ""
+            print(
+                "ℹ️  Overriding RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES to enable HIP_VISIBLE_DEVICES"
+            )
+
         if merged_from_env > 0:
-            print(f"ℹ️  Inherited {merged_from_env} environment variables from shell for Docker")
-        
+            print(
+                f"ℹ️  Inherited {merged_from_env} environment variables from shell for Docker"
+            )
+
         # Also merge from additional_context if present
         if self.additional_context and "docker_env_vars" in self.additional_context:
             merged_count = 0
@@ -1007,11 +1056,14 @@ def run_container(
                 self.context.ctx["docker_env_vars"][key] = value
                 merged_count += 1
             if merged_count > 0:
-                print(f"ℹ️  Merged {merged_count} environment variables from additional_context")
+                print(
+                    f"ℹ️  Merged {merged_count} environment variables from additional_context"
+                )
 
-        if self.context and str(self.context.ctx.get("gpu_vendor", "")).upper().find(
-            "AMD"
-        ) != -1:
+        if (
+            self.context
+            and str(self.context.ctx.get("gpu_vendor", "")).upper().find("AMD") != -1
+        ):
             from madengine.utils.rocm_path_resolver import finalize_container_rocm_path
 
             # Determine whether the user explicitly supplied ROCM_PATH for the container.
@@ -1020,13 +1072,11 @@ def run_container(
             # If they did not, clear any ROCM_PATH left from a previous model run so
             # finalize always re-resolves for the current docker_image (OCI config →
             # in-image probe → /opt/rocm default).
-            user_supplied_rocm_path = (
-                str(
-                    (self.additional_context or {})
-                    .get("docker_env_vars", {})
-                    .get("ROCM_PATH", "")
-                ).strip()
-            )
+            user_supplied_rocm_path = str(
+                (self.additional_context or {})
+                .get("docker_env_vars", {})
+                .get("ROCM_PATH", "")
+            ).strip()
             if not user_supplied_rocm_path:
                 self.context.ctx["docker_env_vars"].pop("ROCM_PATH", None)
 
@@ -1056,7 +1106,9 @@ def run_container(
         # Add system environment collection script to pre_scripts
         # Context can explicitly disable via gen_sys_env_details: false in additional_context
         ctx_sys_env = self.context.ctx.get("gen_sys_env_details")
-        should_collect_sys_env = ctx_sys_env if ctx_sys_env is not None else generate_sys_env_details
+        should_collect_sys_env = (
+            ctx_sys_env if ctx_sys_env is not None else generate_sys_env_details
+        )
         if should_collect_sys_env:
             self.gather_system_env_details(
                 pre_encapsulate_post_scripts, model_info["name"]
@@ -1067,19 +1119,28 @@ def run_container(
         resolved_gpu_count = resolve_runtime_gpus(model_info, self.additional_context)
         docker_options += self.get_gpu_arg(str(resolved_gpu_count))
         docker_options += self.get_cpu_arg()
-        
+
         # Filter out MIOPEN_USER_DB_PATH from run_env if it exists
         # It should be passed via docker_env_vars in context instead
         if "MIOPEN_USER_DB_PATH" in run_env:
             del run_env["MIOPEN_USER_DB_PATH"]
-            print("ℹ️  Removed MIOPEN_USER_DB_PATH from run_env (will use context.docker_env_vars)")
-        
+            print(
+                "ℹ️  Removed MIOPEN_USER_DB_PATH from run_env (will use context.docker_env_vars)"
+            )
+
         # Add MIOPEN_USER_DB_PATH from shell environment to context.docker_env_vars
         # This is set by SLURM script with ${LOCAL_RANK} variable for per-process paths
-        if "MIOPEN_USER_DB_PATH" in os.environ and "MIOPEN_USER_DB_PATH" not in self.context.ctx["docker_env_vars"]:
-            self.context.ctx["docker_env_vars"]["MIOPEN_USER_DB_PATH"] = os.environ["MIOPEN_USER_DB_PATH"]
-            print(f"ℹ️  Added MIOPEN_USER_DB_PATH to docker_env_vars: {os.environ['MIOPEN_USER_DB_PATH']}")
-        
+        if (
+            "MIOPEN_USER_DB_PATH" in os.environ
+            and "MIOPEN_USER_DB_PATH" not in self.context.ctx["docker_env_vars"]
+        ):
+            self.context.ctx["docker_env_vars"]["MIOPEN_USER_DB_PATH"] = os.environ[
+                "MIOPEN_USER_DB_PATH"
+            ]
+            print(
+                f"ℹ️  Added MIOPEN_USER_DB_PATH to docker_env_vars: {os.environ['MIOPEN_USER_DB_PATH']}"
+            )
+
         docker_options += self.get_env_arg(run_env)
         docker_options += self.get_mount_arg(mount_datapaths)
         docker_options += f" {model_info.get('additional_docker_run_options', '')}"
@@ -1088,7 +1149,7 @@ def run_container(
         base_container_name = "container_" + re.sub(
             ".*:", "", docker_image.replace("/", "_").replace(":", "_")
         )
-        
+
         # For multi-node SLURM jobs, add node rank to avoid name conflicts
         node_rank = os.environ.get("SLURM_PROCID") or os.environ.get("RANK")
         if node_rank is not None:
@@ -1098,7 +1159,9 @@ def run_container(
 
         print(f"Docker options: {docker_options}")
 
-        self.rich_console.print(f"\n[bold blue]🏃 Starting Docker container execution...[/bold blue]")
+        self.rich_console.print(
+            f"\n[bold blue]🏃 Starting Docker container execution...[/bold blue]"
+        )
         print(f"🏷️  Image: {docker_image}")
         print(f"📦 Container: {container_name}")
         print(f"📝 Log file: {log_file_path}")
@@ -1113,7 +1176,7 @@ def run_container(
                 ), redirect_stderr(PythonicTee(outlog, self.live_output)):
                     # set timeout (print inside log redirection so it appears in log file)
                     print(f"⏰ Setting timeout to {str(timeout)} seconds.")
-                    
+
                     with Timeout(timeout):
                         model_docker = Docker(
                             docker_image,
@@ -1142,7 +1205,9 @@ def run_container(
                             model_docker.sh("/usr/bin/nvidia-smi || true")
 
                         # Print host vs container environment summary table
-                        _print_run_env_table(gpu_vendor, self.context, model_docker, self.rich_console)
+                        _print_run_env_table(
+                            gpu_vendor, self.context, model_docker, self.rich_console
+                        )
 
                         # Prepare model directory
                         model_dir = "run_directory"
@@ -1254,20 +1319,33 @@ def run_container(
                             and self.data
                         ):
                             self.data.prepare_data(model_info["data"], model_docker)
-                            
+
                             # Capture data provider information from selected_data_provider
                             if (
                                 hasattr(self.data, "selected_data_provider")
                                 and self.data.selected_data_provider
                             ):
                                 if "dataname" in self.data.selected_data_provider:
-                                    run_results["dataname"] = self.data.selected_data_provider["dataname"]
-                                if "data_provider_type" in self.data.selected_data_provider:
-                                    run_results["data_provider_type"] = self.data.selected_data_provider["data_provider_type"]
+                                    run_results["dataname"] = (
+                                        self.data.selected_data_provider["dataname"]
+                                    )
+                                if (
+                                    "data_provider_type"
+                                    in self.data.selected_data_provider
+                                ):
+                                    run_results["data_provider_type"] = (
+                                        self.data.selected_data_provider[
+                                            "data_provider_type"
+                                        ]
+                                    )
                                 if "duration" in self.data.selected_data_provider:
-                                    run_results["data_download_duration"] = self.data.selected_data_provider["duration"]
+                                    run_results["data_download_duration"] = (
+                                        self.data.selected_data_provider["duration"]
+                                    )
                                 if "size" in self.data.selected_data_provider:
-                                    run_results["data_size"] = self.data.selected_data_provider["size"]
+                                    run_results["data_size"] = (
+                                        self.data.selected_data_provider["size"]
+                                    )
                                 print(
                                     f"Data Provider Details: {run_results.get('dataname', '')}, "
                                     f"{run_results.get('data_provider_type', '')}, "
@@ -1280,7 +1358,9 @@ def run_container(
 
                         # Run the model
                         test_start_time = time.time()
-                        self.rich_console.print("[bold blue]Running model...[/bold blue]")
+                        self.rich_console.print(
+                            "[bold blue]Running model...[/bold blue]"
+                        )
 
                         model_args = self.context.ctx.get(
                             "model_args", model_info["args"]
@@ -1310,7 +1390,9 @@ def run_container(
 
                         # When model writes performance to a file in run_directory, copy to cwd
                         # so the host can read it (e.g. bind-mounted workspace) before extraction.
-                        multiple_results_file = (model_info.get("multiple_results") or "").strip()
+                        multiple_results_file = (
+                            model_info.get("multiple_results") or ""
+                        ).strip()
                         if multiple_results_file:
                             try:
                                 model_docker.sh(
@@ -1344,27 +1426,39 @@ def run_container(
                                     # Validate multiple results file format using proper CSV parsing
                                     try:
                                         import csv
+
                                         with open(resolved_path, "r") as f:
                                             csv_reader = csv.DictReader(f)
 
                                             # Strip whitespace from fieldnames to handle headers like "model, performance, metric"
-                                            csv_reader.fieldnames = [f.strip() for f in csv_reader.fieldnames]
+                                            csv_reader.fieldnames = [
+                                                f.strip() for f in csv_reader.fieldnames
+                                            ]
 
                                             # Check if 'performance' column exists
-                                            if 'performance' not in csv_reader.fieldnames:
-                                                print("Error: 'performance' column not found in multiple results file.")
+                                            if (
+                                                "performance"
+                                                not in csv_reader.fieldnames
+                                            ):
+                                                print(
+                                                    "Error: 'performance' column not found in multiple results file."
+                                                )
                                                 run_results["performance"] = None
                                             else:
                                                 # Check if at least one row has a non-empty performance value
                                                 has_valid_perf = False
                                                 for row in csv_reader:
-                                                    if row.get('performance', '').strip():
+                                                    if row.get(
+                                                        "performance", ""
+                                                    ).strip():
                                                         has_valid_perf = True
                                                         break
-                                                
+
                                                 if not has_valid_perf:
                                                     run_results["performance"] = None
-                                                    print("Error: Performance metric is empty in all rows of multiple results file.")
+                                                    print(
+                                                        "Error: Performance metric is empty in all rows of multiple results file."
+                                                    )
                                     except Exception as e:
                                         self.rich_console.print(
                                             f"[yellow]Warning: Could not validate multiple results file: {e}[/yellow]"
@@ -1377,46 +1471,73 @@ def run_container(
                                 # Extract from log file
                                 try:
                                     # Note: re and os are already imported at module level (lines 10, 15)
-                                    
+
                                     # Verify log file exists and is readable
                                     if not os.path.exists(log_file_path):
-                                        print(f"Warning: Log file not found: {log_file_path}")
+                                        print(
+                                            f"Warning: Log file not found: {log_file_path}"
+                                        )
                                         run_results["performance"] = None
                                         run_results["metric"] = None
                                     else:
                                         # Read the log file once (avoids rocprofv3 crash from shell pipelines)
                                         # This approach matches the Kubernetes implementation pattern
-                                        with open(log_file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                                        with open(
+                                            log_file_path,
+                                            "r",
+                                            encoding="utf-8",
+                                            errors="ignore",
+                                        ) as f:
                                             log_content = f.read()
-                                        
+
                                         # Try multiple patterns to match different log formats
-                                        
+
                                         # Pattern 1: "performance: <value>[<unit>][,] <metric>"
                                         # See PERFORMANCE_LOG_PATTERN in deployment.base for accepted formats.
-                                        match = re.search(PERFORMANCE_LOG_PATTERN, log_content)
-                                        
+                                        match = re.search(
+                                            PERFORMANCE_LOG_PATTERN, log_content
+                                        )
+
                                         if match:
-                                            run_results["performance"] = match.group(1).strip()
-                                            run_results["metric"] = match.group(2).strip()
-                                            print(f"✓ Extracted performance: {run_results['performance']} {run_results['metric']}")
+                                            run_results["performance"] = match.group(
+                                                1
+                                            ).strip()
+                                            run_results["metric"] = match.group(
+                                                2
+                                            ).strip()
+                                            print(
+                                                f"✓ Extracted performance: {run_results['performance']} {run_results['metric']}"
+                                            )
                                         else:
                                             # Pattern 2: HuggingFace format - "'train_samples_per_second': 4.23" or "train_samples_per_second = 4.23"
                                             # This matches the actual output from HuggingFace Trainer
                                             hf_pattern = r'train_samples_per_second[\'"\s:=]+([0-9][0-9.eE+-]*)'
-                                            hf_match = re.search(hf_pattern, log_content)
-                                            
+                                            hf_match = re.search(
+                                                hf_pattern, log_content
+                                            )
+
                                             if hf_match:
-                                                run_results["performance"] = hf_match.group(1).strip()
-                                                run_results["metric"] = "samples_per_second"
-                                                print(f"✓ Extracted performance (HuggingFace format): {run_results['performance']} {run_results['metric']}")
+                                                run_results["performance"] = (
+                                                    hf_match.group(1).strip()
+                                                )
+                                                run_results["metric"] = (
+                                                    "samples_per_second"
+                                                )
+                                                print(
+                                                    f"✓ Extracted performance (HuggingFace format): {run_results['performance']} {run_results['metric']}"
+                                                )
                                             else:
                                                 # No performance metrics found
-                                                print("Warning: Performance metric not found in expected format 'performance: NUMBER METRIC' or 'train_samples_per_second'")
+                                                print(
+                                                    "Warning: Performance metric not found in expected format 'performance: NUMBER METRIC' or 'train_samples_per_second'"
+                                                )
                                                 run_results["performance"] = None
                                                 run_results["metric"] = None
-                                            
+
                                 except Exception as e:
-                                    print(f"Warning: Error extracting performance metrics: {e}")
+                                    print(
+                                        f"Warning: Error extracting performance metrics: {e}"
+                                    )
                                     run_results["performance"] = None
                                     run_results["metric"] = None
                                     # Performance extraction is optional - don't fail the entire run
@@ -1500,9 +1621,12 @@ def run_container(
                                 and performance_value.strip()
                                 and performance_value.strip() != "N/A"
                             )
-                            
+
                             # Check if this is a worker node (not collecting metrics)
-                            is_worker_node = os.environ.get("MAD_COLLECT_METRICS", "true").lower() == "false"
+                            is_worker_node = (
+                                os.environ.get("MAD_COLLECT_METRICS", "true").lower()
+                                == "false"
+                            )
 
                             if has_errors:
                                 run_results["status"] = "FAILURE"
@@ -1522,13 +1646,20 @@ def run_container(
                                 )
                             else:
                                 run_results["status"] = "FAILURE"
-                                self.rich_console.print(f"[red]Status: FAILURE (no performance metrics)[/red]")
+                                self.rich_console.print(
+                                    f"[red]Status: FAILURE (no performance metrics)[/red]"
+                                )
 
                         except Exception as e:
-                            self.rich_console.print(f"[yellow]Warning: Error in status determination: {e}[/yellow]")
+                            self.rich_console.print(
+                                f"[yellow]Warning: Error in status determination: {e}[/yellow]"
+                            )
                             # Fallback to simple performance check
                             # Worker nodes don't need performance metrics
-                            is_worker_node = os.environ.get("MAD_COLLECT_METRICS", "true").lower() == "false"
+                            is_worker_node = (
+                                os.environ.get("MAD_COLLECT_METRICS", "true").lower()
+                                == "false"
+                            )
                             run_results["status"] = (
                                 "SUCCESS"
                                 if run_results.get("performance") or is_worker_node
@@ -1544,8 +1675,10 @@ def run_container(
                         # =============================================================================
                         # For distributed training, only master node should collect metrics
                         # Check skip_perf_collection flag from additional_context
-                        skip_perf = self.additional_context.get("skip_perf_collection", False)
-                        
+                        skip_perf = self.additional_context.get(
+                            "skip_perf_collection", False
+                        )
+
                         if skip_perf:
                             self.rich_console.print(
                                 "[cyan]ℹ️  Worker node: Skipping performance metric collection "
@@ -1561,9 +1694,13 @@ def run_container(
                                 )
 
                                 # Handle multiple results if specified
-                                multiple_results = model_info.get("multiple_results", None)
+                                multiple_results = model_info.get(
+                                    "multiple_results", None
+                                )
                                 resolved_multiple_results = (
-                                    _resolve_multiple_results_path(multiple_results, model_dir)
+                                    _resolve_multiple_results_path(
+                                        multiple_results, model_dir
+                                    )
                                     if multiple_results
                                     else None
                                 )
@@ -1574,7 +1711,12 @@ def run_container(
                                     # Generate common info JSON for multiple results
                                     common_info = run_details_dict.copy()
                                     # Remove model-specific fields for common info
-                                    for key in ["model", "performance", "metric", "status"]:
+                                    for key in [
+                                        "model",
+                                        "performance",
+                                        "metric",
+                                        "status",
+                                    ]:
                                         common_info.pop(key, None)
 
                                     with open("common_info.json", "w") as f:
@@ -1594,8 +1736,10 @@ def run_container(
                                     # Update perf_super.json with multiple results
                                     try:
                                         scripts_path = model_info.get("scripts", "")
-                                        scripts_base_dir = scripts_base_dir_from(scripts_path)
-                                        
+                                        scripts_base_dir = scripts_base_dir_from(
+                                            scripts_path
+                                        )
+
                                         # Reuse common_info.json for super files (no need for duplicate)
                                         num_entries = update_perf_super_json(
                                             multiple_results=resolved_multiple_results,
@@ -1604,15 +1748,17 @@ def run_container(
                                             common_info="common_info.json",
                                             scripts_base_dir=scripts_base_dir,
                                         )
-                                        
+
                                         # Generate CSV and JSON files from perf_super.json
                                         update_perf_super_csv(
                                             perf_super_json="perf_super.json",
                                             perf_super_csv="perf_super.csv",
-                                            num_entries=num_entries
+                                            num_entries=num_entries,
                                         )
                                     except Exception as e:
-                                        print(f"⚠️  Warning: Could not update perf_super files: {e}")
+                                        print(
+                                            f"⚠️  Warning: Could not update perf_super files: {e}"
+                                        )
                                 else:
                                     # Generate single result JSON
                                     with open("perf_entry.json", "w") as f:
@@ -1636,8 +1782,10 @@ def run_container(
                                     # Update perf_super.json with single result
                                     try:
                                         scripts_path = model_info.get("scripts", "")
-                                        scripts_base_dir = scripts_base_dir_from(scripts_path)
-                                        
+                                        scripts_base_dir = scripts_base_dir_from(
+                                            scripts_path
+                                        )
+
                                         # Use perf_entry.json as input (already created above)
                                         if run_results.get("status") == "SUCCESS":
                                             num_entries = update_perf_super_json(
@@ -1651,18 +1799,22 @@ def run_container(
                                                 perf_super_json="perf_super.json",
                                                 scripts_base_dir=scripts_base_dir,
                                             )
-                                        
+
                                         # Generate CSV and JSON files from perf_super.json
                                         update_perf_super_csv(
                                             perf_super_json="perf_super.json",
                                             perf_super_csv="perf_super.csv",
-                                            num_entries=num_entries
+                                            num_entries=num_entries,
                                         )
                                     except Exception as e:
-                                        print(f"⚠️  Warning: Could not update perf_super files: {e}")
+                                        print(
+                                            f"⚠️  Warning: Could not update perf_super files: {e}"
+                                        )
 
                             except Exception as e:
-                                self.rich_console.print(f"[yellow]Warning: Could not update perf.csv: {e}[/yellow]")
+                                self.rich_console.print(
+                                    f"[yellow]Warning: Could not update perf.csv: {e}[/yellow]"
+                                )
 
                         # Copy profiler/trace output files from run_directory to base directory before cleanup
                         # This ensures test files like gpu_info_power_profiler_output.csv and library_trace.csv are accessible
@@ -1681,7 +1833,9 @@ def run_container(
                                 f"{_bash_quote_path('.')} 2>/dev/null || true"
                             )
                             model_docker.sh(
-                                _cp_model_dir_file_to_cwd_cmd(model_dir, "library_trace.csv")
+                                _cp_model_dir_file_to_cwd_cmd(
+                                    model_dir, "library_trace.csv"
+                                )
                             )
                         except Exception as e:
                             # Ignore errors if no profiler/trace output files exist
@@ -1745,25 +1899,27 @@ def run_container(
                 try:
                     scripts_path = model_info.get("scripts", "")
                     scripts_base_dir = scripts_base_dir_from(scripts_path)
-                    
+
                     # Use perf_entry.json as input (already created above)
                     num_entries = update_perf_super_json(
                         exception_result="perf_entry.json",
                         perf_super_json="perf_super.json",
                         scripts_base_dir=scripts_base_dir,
                     )
-                    
+
                     # Generate CSV and JSON files from perf_super.json
                     update_perf_super_csv(
                         perf_super_json="perf_super.json",
                         perf_super_csv="perf_super.csv",
-                        num_entries=num_entries
+                        num_entries=num_entries,
                     )
                 except Exception as e:
                     print(f"⚠️  Warning: Could not update perf_super files: {e}")
 
             except Exception as csv_e:
-                self.rich_console.print(f"[yellow]Warning: Could not update perf.csv with exception: {csv_e}[/yellow]")
+                self.rich_console.print(
+                    f"[yellow]Warning: Could not update perf.csv with exception: {csv_e}[/yellow]"
+                )
 
         return run_results
 
@@ -1799,67 +1955,94 @@ def run_models_from_manifest(
         Returns:
             dict: Execution summary with successful and failed runs
         """
-        self.rich_console.print(f"[bold blue]📦 Loading manifest:[/bold blue] {manifest_file}")
-        
+        self.rich_console.print(
+            f"[bold blue]📦 Loading manifest:[/bold blue] {manifest_file}"
+        )
+
         # Load manifest
         manifest = self.load_build_manifest(manifest_file)
         built_images = manifest.get("built_images", {})
         built_models = manifest.get("built_models", {})
-        
+
         # Load deployment_config from manifest for GPU resolution
         if "deployment_config" in manifest and not self.additional_context:
-            self.additional_context = {"deployment_config": manifest["deployment_config"]}
+            self.additional_context = {
+                "deployment_config": manifest["deployment_config"]
+            }
         # Merge manifest context (e.g. skip_perf_collection for multi-node SLURM aggregation)
         if "context" in manifest and isinstance(manifest["context"], dict):
-            self.additional_context = {**(self.additional_context or {}), **manifest["context"]}
+            self.additional_context = {
+                **(self.additional_context or {}),
+                **manifest["context"],
+            }
 
         if not built_images:
             self.rich_console.print("[yellow]⚠️  No images found in manifest[/yellow]")
             return {"successful_runs": [], "failed_runs": []}
-        
-        self.rich_console.print(f"[green]Found {len(built_images)} image(s) to run[/green]\n")
-        
+
+        self.rich_console.print(
+            f"[green]Found {len(built_images)} image(s) to run[/green]\n"
+        )
+
         # Login to registry if needed
         if registry or any(img.get("registry") for img in built_images.values()):
             effective_registry = registry or next(
-                (img.get("registry") for img in built_images.values() if img.get("registry")), 
-                None
+                (
+                    img.get("registry")
+                    for img in built_images.values()
+                    if img.get("registry")
+                ),
+                None,
             )
             if effective_registry:
                 try:
                     self.login_to_registry(effective_registry, self.credentials)
                 except Exception as e:
-                    self.rich_console.print(f"[yellow]Warning: Registry login failed: {e}[/yellow]")
-                    self.rich_console.print("[yellow]Proceeding with local images only[/yellow]\n")
-        
+                    self.rich_console.print(
+                        f"[yellow]Warning: Registry login failed: {e}[/yellow]"
+                    )
+                    self.rich_console.print(
+                        "[yellow]Proceeding with local images only[/yellow]\n"
+                    )
+
         # Track results
         successful_runs = []
         failed_runs = []
-        
+
         # Run each model
         for image_name, build_info in built_images.items():
             model_info = built_models.get(image_name, {})
             if not model_info:
-                self.rich_console.print(f"[yellow]⚠️  No model info for {image_name}, skipping[/yellow]")
+                self.rich_console.print(
+                    f"[yellow]⚠️  No model info for {image_name}, skipping[/yellow]"
+                )
                 continue
-            
+
             try:
                 # Handle different image sources
                 if build_info.get("local_image"):
                     # Local image mode (MAD_CONTAINER_IMAGE): Use the provided image directly
                     run_image = build_info.get("docker_image")
-                    self.rich_console.print(f"[yellow]🏠 Using local image: {run_image}[/yellow]")
-                    
+                    self.rich_console.print(
+                        f"[yellow]🏠 Using local image: {run_image}[/yellow]"
+                    )
+
                     # Verify image exists
                     try:
-                        self.console.sh(f"docker image inspect {run_image} > /dev/null 2>&1")
+                        self.console.sh(
+                            f"docker image inspect {run_image} > /dev/null 2>&1"
+                        )
                     except (subprocess.CalledProcessError, RuntimeError) as e:
-                        self.rich_console.print(f"[yellow]⚠️  Image {run_image} not found, attempting to pull...[/yellow]")
+                        self.rich_console.print(
+                            f"[yellow]⚠️  Image {run_image} not found, attempting to pull...[/yellow]"
+                        )
                         try:
                             self.pull_image(run_image)
                         except Exception as e:
-                            raise RuntimeError(f"Failed to find or pull local image {run_image}: {e}")
-                
+                            raise RuntimeError(
+                                f"Failed to find or pull local image {run_image}: {e}"
+                            )
+
                 elif build_info.get("registry_image"):
                     # Registry image: Pull from registry
                     try:
@@ -1867,12 +2050,14 @@ def run_models_from_manifest(
                         # Update docker_image to use registry image
                         run_image = build_info["registry_image"]
                     except Exception as pull_error:
-                        self.rich_console.print(f"[yellow]Warning: Could not pull from registry, using local image[/yellow]")
+                        self.rich_console.print(
+                            f"[yellow]Warning: Could not pull from registry, using local image[/yellow]"
+                        )
                         run_image = image_name
                 else:
                     # Normal built image: Use the image name directly
                     run_image = image_name
-                
+
                 # Run the container
                 run_results = self.run_container(
                     model_info=model_info,
@@ -1883,38 +2068,49 @@ def run_models_from_manifest(
                     timeout=timeout,
                     phase_suffix=phase_suffix,
                 )
-                
+
                 # Check actual status and track accordingly
                 status = run_results.get("status", "SUCCESS")
                 if status == "SUCCESS":
-                    successful_runs.append({
-                        "model": model_info["name"],
-                        "image": run_image,
-                        "status": status,
-                        "performance": run_results.get("performance"),
-                        "duration": run_results.get("test_duration"),
-                    })
+                    successful_runs.append(
+                        {
+                            "model": model_info["name"],
+                            "image": run_image,
+                            "status": status,
+                            "performance": run_results.get("performance"),
+                            "duration": run_results.get("test_duration"),
+                        }
+                    )
                 else:
                     # Status is FAILURE - track as failed
-                    failed_runs.append({
-                        "model": model_info["name"],
-                        "image": run_image,
-                        "status": status,
-                        "error": "Container execution failed - check logs for details",
-                    })
-                    self.rich_console.print(f"[red]❌ Run failed for {model_info['name']}: Status={status}[/red]")
-                
+                    failed_runs.append(
+                        {
+                            "model": model_info["name"],
+                            "image": run_image,
+                            "status": status,
+                            "error": "Container execution failed - check logs for details",
+                        }
+                    )
+                    self.rich_console.print(
+                        f"[red]❌ Run failed for {model_info['name']}: Status={status}[/red]"
+                    )
+
             except Exception as e:
-                self.rich_console.print(f"[red]❌ Failed to run {model_info['name']}: {e}[/red]")
+                self.rich_console.print(
+                    f"[red]❌ Failed to run {model_info['name']}: {e}[/red]"
+                )
                 error_msg = str(e)
-                failed_runs.append({
-                    "model": model_info.get("name", image_name),
-                    "image": image_name,
-                    "error": error_msg,
-                })
+                failed_runs.append(
+                    {
+                        "model": model_info.get("name", image_name),
+                        "image": image_name,
+                        "error": error_msg,
+                    }
+                )
                 # Record failure in performance table so status is consistent and table is complete
                 try:
                     import tempfile
+
                     self.ensure_perf_csv_exists()
                     perf_entry = self._create_setup_failure_perf_entry(
                         model_info=model_info,
@@ -1941,12 +2137,14 @@ def run_models_from_manifest(
                     self.rich_console.print(
                         f"[yellow]Warning: Could not record setup failure to perf CSV: {csv_e}[/yellow]"
                     )
-        
+
         # Summary
         self.rich_console.print(f"\n[bold]📊 Execution Summary:[/bold]")
-        self.rich_console.print(f"  [green]✓ Successful:[/green] {len(successful_runs)}")
+        self.rich_console.print(
+            f"  [green]✓ Successful:[/green] {len(successful_runs)}"
+        )
         self.rich_console.print(f"  [red]✗ Failed:[/red] {len(failed_runs)}")
-        
+
         return {
             "successful_runs": successful_runs,
             "failed_runs": failed_runs,
diff --git a/src/madengine/execution/container_runner_helpers.py b/src/madengine/execution/container_runner_helpers.py
index dfa99be5..63325e7a 100644
--- a/src/madengine/execution/container_runner_helpers.py
+++ b/src/madengine/execution/container_runner_helpers.py
@@ -205,19 +205,13 @@ def _docker_image_ref_for_log_naming(docker_image: str) -> str:
     ref_without_digest = s.split("@", 1)[0]
     last_slash = ref_without_digest.rfind("/")
     tail = (
-        ref_without_digest[last_slash + 1 :]
-        if last_slash >= 0
-        else ref_without_digest
+        ref_without_digest[last_slash + 1 :] if last_slash >= 0 else ref_without_digest
     )
     if ":" in tail:
         _, tag = tail.split(":", 1)
         if tag.startswith("ci-"):
             return tag
-    return (
-        ref_without_digest.replace("/", "_")
-        .replace(":", "_")
-        .replace("@", "_")
-    )
+    return ref_without_digest.replace("/", "_").replace(":", "_").replace("@", "_")
 
 
 def make_run_log_file_path(
diff --git a/src/madengine/execution/docker_builder.py b/src/madengine/execution/docker_builder.py
index 56f33d6d..b7130def 100644
--- a/src/madengine/execution/docker_builder.py
+++ b/src/madengine/execution/docker_builder.py
@@ -7,22 +7,24 @@
 and then distributed to remote nodes for execution.
 """
 
+import json
 import os
+import re
 import shlex
 import time
-import json
-import re
 import typing
-from contextlib import redirect_stdout, redirect_stderr
+from contextlib import redirect_stderr, redirect_stdout
+
 from rich.console import Console as RichConsole
+
 from madengine.core.auth import login_to_registry
 from madengine.core.console import Console
 from madengine.core.context import Context
-from madengine.utils.ops import PythonicTee
 from madengine.execution.dockerfile_utils import (
     is_target_arch_compatible_with_variable,
     parse_dockerfile_gpu_variables,
 )
+from madengine.utils.ops import PythonicTee
 
 
 class DockerBuilder:
@@ -93,7 +95,13 @@ def get_build_arg(self, run_build_arg: typing.Optional[typing.Dict] = None) -> s
 
         if run_build_arg:
             for key, value in run_build_arg.items():
-                build_args += "--build-arg " + shlex.quote(str(key)) + "=" + shlex.quote(str(value)) + " "
+                build_args += (
+                    "--build-arg "
+                    + shlex.quote(str(key))
+                    + "="
+                    + shlex.quote(str(value))
+                    + " "
+                )
 
         return build_args
 
@@ -146,7 +154,9 @@ def build_image(
         # Replace / with _ in log file path (already done above, but keeping for safety)
         log_file_path = log_file_path.replace("/", "_")
 
-        self.rich_console.print(f"\n[bold green]🔨 Starting Docker build for model:[/bold green] [bold cyan]{model_info['name']}[/bold cyan]")
+        self.rich_console.print(
+            f"\n[bold green]🔨 Starting Docker build for model:[/bold green] [bold cyan]{model_info['name']}[/bold cyan]"
+        )
         print(f"📁 Dockerfile: {dockerfile}")
         print(f"🏷️  Target image: {docker_image}")
         print(f"📝 Build log: {log_file_path}")
@@ -195,7 +205,9 @@ def build_image(
 
                 print(f"⏱️  Build Duration: {build_duration:.2f} seconds")
                 print(f"🏷️  MAD_CONTAINER_IMAGE is {docker_image}")
-                self.rich_console.print(f"[bold green]✅ Docker build completed successfully[/bold green]")
+                self.rich_console.print(
+                    f"[bold green]✅ Docker build completed successfully[/bold green]"
+                )
                 self.rich_console.print(f"[dim]{'='*80}[/dim]")
 
                 # Get base docker info
@@ -220,11 +232,13 @@ def build_image(
                     )
                     print(f"BASE DOCKER SHA is {docker_sha}")
                 except Exception as e:
-                    self.rich_console.print(f"[yellow]Warning: Could not get docker SHA: {e}[/yellow]")
+                    self.rich_console.print(
+                        f"[yellow]Warning: Could not get docker SHA: {e}[/yellow]"
+                    )
 
         # Infer GPU vendor from dockerfile path
         gpu_vendor = self._infer_gpu_vendor_from_dockerfile(dockerfile)
-        
+
         build_info = {
             "model": model_info["name"],
             "docker_image": docker_image,
@@ -243,7 +257,9 @@ def build_image(
         # Store model info linked to the built image
         self.built_models[docker_image] = model_info
 
-        self.rich_console.print(f"[bold green]Successfully built image:[/bold green] [cyan]{docker_image}[/cyan]")
+        self.rich_console.print(
+            f"[bold green]Successfully built image:[/bold green] [cyan]{docker_image}[/cyan]"
+        )
 
         return build_info
 
@@ -306,17 +322,23 @@ def push_image(
 
             # Push the image
             push_command = f"docker push {registry_image}"
-            self.rich_console.print(f"\n[bold blue]🚀 Starting docker push to registry...[/bold blue]")
+            self.rich_console.print(
+                f"\n[bold blue]🚀 Starting docker push to registry...[/bold blue]"
+            )
             print(f"📤 Registry: {registry}")
             print(f"🏷️  Image: {registry_image}")
             self.console.sh(push_command)
 
-            self.rich_console.print(f"[bold green]✅ Successfully pushed image to registry:[/bold green] [cyan]{registry_image}[/cyan]")
+            self.rich_console.print(
+                f"[bold green]✅ Successfully pushed image to registry:[/bold green] [cyan]{registry_image}[/cyan]"
+            )
             self.rich_console.print(f"[dim]{'='*80}[/dim]")
             return registry_image
 
         except Exception as e:
-            self.rich_console.print(f"[red]❌ Failed to push image {docker_image} to registry {registry}: {e}[/red]")
+            self.rich_console.print(
+                f"[red]❌ Failed to push image {docker_image} to registry {registry}: {e}[/red]"
+            )
             raise
 
     def export_build_manifest(
@@ -379,18 +401,20 @@ def export_build_manifest(
             },
             "credentials_required": credentials_required,
         }
-        
+
         # Preserve tools configuration if present in context
         if "tools" in self.context.ctx:
             manifest["context"]["tools"] = self.context.ctx["tools"]
-        
+
         # Preserve pre/post scripts if present in context
         if "pre_scripts" in self.context.ctx:
             manifest["context"]["pre_scripts"] = self.context.ctx["pre_scripts"]
         if "post_scripts" in self.context.ctx:
             manifest["context"]["post_scripts"] = self.context.ctx["post_scripts"]
         if "encapsulate_script" in self.context.ctx:
-            manifest["context"]["encapsulate_script"] = self.context.ctx["encapsulate_script"]
+            manifest["context"]["encapsulate_script"] = self.context.ctx[
+                "encapsulate_script"
+            ]
 
         # Add push failure summary if any pushes failed
         push_failures = []
@@ -410,9 +434,13 @@ def export_build_manifest(
         with open(output_file, "w") as f:
             json.dump(manifest, f, indent=2)
 
-        self.rich_console.print(f"[green]Build manifest exported to:[/green] {output_file}")
+        self.rich_console.print(
+            f"[green]Build manifest exported to:[/green] {output_file}"
+        )
         if push_failures:
-            self.rich_console.print(f"[yellow]Warning: {len(push_failures)} image(s) failed to push to registry[/yellow]")
+            self.rich_console.print(
+                f"[yellow]Warning: {len(push_failures)} image(s) failed to push to registry[/yellow]"
+            )
             for failure in push_failures:
                 self.rich_console.print(
                     f"[red]  - {failure['image']} -> {failure['intended_registry_image']}: {failure['error']}[/red]"
@@ -442,12 +470,18 @@ def build_all_models(
         Returns:
             dict: Summary of all built images
         """
-        self.rich_console.print(f"[bold blue]Building Docker images for {len(models)} models...[/bold blue]")
-        
+        self.rich_console.print(
+            f"[bold blue]Building Docker images for {len(models)} models...[/bold blue]"
+        )
+
         if target_archs:
-            self.rich_console.print(f"[bold cyan]Multi-architecture build mode enabled for: {', '.join(target_archs)}[/bold cyan]")
+            self.rich_console.print(
+                f"[bold cyan]Multi-architecture build mode enabled for: {', '.join(target_archs)}[/bold cyan]"
+            )
         else:
-            self.rich_console.print(f"[bold cyan]Single architecture build mode[/bold cyan]")
+            self.rich_console.print(
+                f"[bold cyan]Single architecture build mode[/bold cyan]"
+            )
 
         build_summary = {
             "successful_builds": [],
@@ -456,69 +490,89 @@ def build_all_models(
             "successful_pushes": [],
             "failed_pushes": [],
         }
-        
+
         for model_info in models:
             # Check if MAD_SYSTEM_GPU_ARCHITECTURE is provided in additional_context
             # This overrides --target-archs and uses default flow
-            if ("docker_build_arg" in self.context.ctx and 
-                "MAD_SYSTEM_GPU_ARCHITECTURE" in self.context.ctx["docker_build_arg"]):
-                self.rich_console.print(f"[yellow]Info: MAD_SYSTEM_GPU_ARCHITECTURE provided in additional_context, "
-                      f"disabling --target-archs and using default flow for model {model_info['name']}[/yellow]")
+            if (
+                "docker_build_arg" in self.context.ctx
+                and "MAD_SYSTEM_GPU_ARCHITECTURE"
+                in self.context.ctx["docker_build_arg"]
+            ):
+                self.rich_console.print(
+                    f"[yellow]Info: MAD_SYSTEM_GPU_ARCHITECTURE provided in additional_context, "
+                    f"disabling --target-archs and using default flow for model {model_info['name']}[/yellow]"
+                )
                 # Use single architecture build mode regardless of target_archs
                 try:
                     single_build_info = self._build_model_single_arch(
-                        model_info, credentials, clean_cache, 
-                        registry, phase_suffix, batch_build_metadata
+                        model_info,
+                        credentials,
+                        clean_cache,
+                        registry,
+                        phase_suffix,
+                        batch_build_metadata,
                     )
                     build_summary["successful_builds"].extend(single_build_info)
                     build_summary["total_build_time"] += sum(
                         info.get("build_duration", 0) for info in single_build_info
                     )
                 except Exception as e:
-                    build_summary["failed_builds"].append({
-                        "model": model_info["name"],
-                        "error": str(e)
-                    })
+                    build_summary["failed_builds"].append(
+                        {"model": model_info["name"], "error": str(e)}
+                    )
             elif target_archs:
                 # Multi-architecture build mode - always use architecture suffix
                 for arch in target_archs:
                     try:
                         # Always build with architecture suffix when --target-archs is used
                         arch_build_info = self._build_model_for_arch(
-                            model_info, arch, credentials, clean_cache, 
-                            registry, phase_suffix, batch_build_metadata
+                            model_info,
+                            arch,
+                            credentials,
+                            clean_cache,
+                            registry,
+                            phase_suffix,
+                            batch_build_metadata,
                         )
-                        
+
                         build_summary["successful_builds"].extend(arch_build_info)
                         build_summary["total_build_time"] += sum(
                             info.get("build_duration", 0) for info in arch_build_info
                         )
                     except Exception as e:
-                        build_summary["failed_builds"].append({
-                            "model": model_info["name"],
-                            "architecture": arch,
-                            "error": str(e)
-                        })
+                        build_summary["failed_builds"].append(
+                            {
+                                "model": model_info["name"],
+                                "architecture": arch,
+                                "error": str(e),
+                            }
+                        )
             else:
                 # Single architecture build mode (existing behavior - no validation needed)
                 try:
                     single_build_info = self._build_model_single_arch(
-                        model_info, credentials, clean_cache, 
-                        registry, phase_suffix, batch_build_metadata
+                        model_info,
+                        credentials,
+                        clean_cache,
+                        registry,
+                        phase_suffix,
+                        batch_build_metadata,
                     )
                     build_summary["successful_builds"].extend(single_build_info)
                     build_summary["total_build_time"] += sum(
                         info.get("build_duration", 0) for info in single_build_info
                     )
                 except Exception as e:
-                    build_summary["failed_builds"].append({
-                        "model": model_info["name"],
-                        "error": str(e)
-                    })
-        
+                    build_summary["failed_builds"].append(
+                        {"model": model_info["name"], "error": str(e)}
+                    )
+
         return build_summary
 
-    def _check_dockerfile_has_gpu_variables(self, model_info: typing.Dict) -> typing.Tuple[bool, str]:
+    def _check_dockerfile_has_gpu_variables(
+        self, model_info: typing.Dict
+    ) -> typing.Tuple[bool, str]:
         """
         Check if model's Dockerfile contains GPU architecture variables.
         Returns (has_gpu_vars, dockerfile_path)
@@ -526,24 +580,26 @@ def _check_dockerfile_has_gpu_variables(self, model_info: typing.Dict) -> typing
         try:
             # Find dockerfiles for this model
             dockerfiles = self._get_dockerfiles_for_model(model_info)
-            
+
             for dockerfile_path in dockerfiles:
-                with open(dockerfile_path, 'r') as f:
+                with open(dockerfile_path, "r") as f:
                     dockerfile_content = f.read()
-                
+
                 # Parse GPU architecture variables from Dockerfile
                 dockerfile_gpu_vars = parse_dockerfile_gpu_variables(dockerfile_content)
-                
+
                 if dockerfile_gpu_vars:
                     return True, dockerfile_path
                 else:
                     return False, dockerfile_path
-            
+
             # No dockerfiles found
             return False, "No Dockerfile found"
-            
+
         except Exception as e:
-            self.rich_console.print(f"[yellow]Warning: Error checking GPU variables for model {model_info['name']}: {e}[/yellow]")
+            self.rich_console.print(
+                f"[yellow]Warning: Error checking GPU variables for model {model_info['name']}: {e}[/yellow]"
+            )
             return False, "Error reading Dockerfile"
 
     def _get_dockerfiles_for_model(self, model_info: typing.Dict) -> typing.List[str]:
@@ -551,9 +607,7 @@ def _get_dockerfiles_for_model(self, model_info: typing.Dict) -> typing.List[str
         try:
             # Quote the dockerfile path to prevent shell injection
             dockerfile_quoted = shlex.quote(model_info["dockerfile"])
-            all_dockerfiles = self.console.sh(
-                f"ls {dockerfile_quoted}.*"
-            ).split("\n")
+            all_dockerfiles = self.console.sh(f"ls {dockerfile_quoted}.*").split("\n")
 
             dockerfiles = {}
             for cur_docker_file in all_dockerfiles:
@@ -564,14 +618,18 @@ def _get_dockerfiles_for_model(self, model_info: typing.Dict) -> typing.List[str
 
             # Filter dockerfiles based on context
             dockerfiles = self.context.filter(dockerfiles)
-            
+
             return list(dockerfiles.keys())
-            
+
         except Exception as e:
-            self.rich_console.print(f"[yellow]Warning: Error finding dockerfiles for model {model_info['name']}: {e}[/yellow]")
+            self.rich_console.print(
+                f"[yellow]Warning: Error finding dockerfiles for model {model_info['name']}: {e}[/yellow]"
+            )
             return []
 
-    def _validate_target_arch_against_dockerfile(self, model_info: typing.Dict, target_arch: str) -> bool:
+    def _validate_target_arch_against_dockerfile(
+        self, model_info: typing.Dict, target_arch: str
+    ) -> bool:
         """
         Validate that target architecture is compatible with model's Dockerfile GPU variables.
         Called during build phase when --target-archs is provided.
@@ -579,71 +637,77 @@ def _validate_target_arch_against_dockerfile(self, model_info: typing.Dict, targ
         try:
             # Find dockerfiles for this model
             dockerfiles = self._get_dockerfiles_for_model(model_info)
-            
+
             for dockerfile_path in dockerfiles:
-                with open(dockerfile_path, 'r') as f:
+                with open(dockerfile_path, "r") as f:
                     dockerfile_content = f.read()
-                
+
                 # Parse GPU architecture variables from Dockerfile
                 dockerfile_gpu_vars = parse_dockerfile_gpu_variables(dockerfile_content)
-                
+
                 if not dockerfile_gpu_vars:
                     # No GPU variables found - target arch is acceptable
-                    self.rich_console.print(f"[cyan]Info: No GPU architecture variables found in {dockerfile_path}, "
-                          f"target architecture '{target_arch}' is acceptable[/cyan]")
+                    self.rich_console.print(
+                        f"[cyan]Info: No GPU architecture variables found in {dockerfile_path}, "
+                        f"target architecture '{target_arch}' is acceptable[/cyan]"
+                    )
                     continue
-                
+
                 # Validate target architecture against each GPU variable
                 for var_name, var_values in dockerfile_gpu_vars.items():
                     if not is_target_arch_compatible_with_variable(
                         var_name, var_values, target_arch
                     ):
-                        self.rich_console.print(f"[red]Error: Target architecture '{target_arch}' is not compatible "
-                              f"with {var_name}={var_values} in {dockerfile_path}[/red]")
+                        self.rich_console.print(
+                            f"[red]Error: Target architecture '{target_arch}' is not compatible "
+                            f"with {var_name}={var_values} in {dockerfile_path}[/red]"
+                        )
                         return False
-                
-                self.rich_console.print(f"[cyan]Info: Target architecture '{target_arch}' validated successfully "
-                      f"against {dockerfile_path}[/cyan]")
-            
+
+                self.rich_console.print(
+                    f"[cyan]Info: Target architecture '{target_arch}' validated successfully "
+                    f"against {dockerfile_path}[/cyan]"
+                )
+
             return True
-            
+
         except FileNotFoundError as e:
-            self.rich_console.print(f"[yellow]Warning: Dockerfile not found for model {model_info['name']}: {e}[/yellow]")
+            self.rich_console.print(
+                f"[yellow]Warning: Dockerfile not found for model {model_info['name']}: {e}[/yellow]"
+            )
             return True  # Assume compatible if Dockerfile not found
         except Exception as e:
-            self.rich_console.print(f"[yellow]Warning: Error validating target architecture for model {model_info['name']}: {e}[/yellow]")
+            self.rich_console.print(
+                f"[yellow]Warning: Error validating target architecture for model {model_info['name']}: {e}[/yellow]"
+            )
             return True  # Assume compatible on parsing errors
 
     def _build_model_single_arch(
-        self, 
+        self,
         model_info: typing.Dict,
         credentials: typing.Dict,
         clean_cache: bool,
         registry: str,
         phase_suffix: str,
-        batch_build_metadata: typing.Optional[dict]
+        batch_build_metadata: typing.Optional[dict],
     ) -> typing.List[typing.Dict]:
         """Build model using existing single architecture flow."""
-        
+
         # Use existing build logic - MAD_SYSTEM_GPU_ARCHITECTURE comes from additional_context
         # or Dockerfile defaults
         dockerfiles = self._get_dockerfiles_for_model(model_info)
-        
+
         results = []
         for dockerfile in dockerfiles:
             build_info = self.build_image(
-                model_info, 
-                dockerfile, 
-                credentials,
-                clean_cache, 
-                phase_suffix
+                model_info, dockerfile, credentials, clean_cache, phase_suffix
             )
-            
+
             # Extract GPU architecture from build args or context for manifest
             gpu_arch = self._get_effective_gpu_architecture(model_info, dockerfile)
             if gpu_arch:
                 build_info["gpu_architecture"] = gpu_arch
-            
+
             # Handle registry push (existing logic)
             if registry:
                 try:
@@ -654,73 +718,82 @@ def _build_model_single_arch(
                         model_info,
                         credentials,
                     )
-                    self.push_image(build_info["docker_image"], registry, credentials, registry_image)
+                    self.push_image(
+                        build_info["docker_image"],
+                        registry,
+                        credentials,
+                        registry_image,
+                    )
                     build_info["registry_image"] = registry_image
                 except Exception as e:
                     build_info["push_error"] = str(e)
-            
+
             results.append(build_info)
-        
+
         return results
 
-    def _get_effective_gpu_architecture(self, model_info: typing.Dict, dockerfile_path: str) -> str:
+    def _get_effective_gpu_architecture(
+        self, model_info: typing.Dict, dockerfile_path: str
+    ) -> str:
         """Get effective GPU architecture for single arch builds."""
         # Check if MAD_SYSTEM_GPU_ARCHITECTURE is in build args from additional_context
-        if ("docker_build_arg" in self.context.ctx and 
-            "MAD_SYSTEM_GPU_ARCHITECTURE" in self.context.ctx["docker_build_arg"]):
+        if (
+            "docker_build_arg" in self.context.ctx
+            and "MAD_SYSTEM_GPU_ARCHITECTURE" in self.context.ctx["docker_build_arg"]
+        ):
             return self.context.ctx["docker_build_arg"]["MAD_SYSTEM_GPU_ARCHITECTURE"]
-        
+
         # Try to extract from Dockerfile defaults
         try:
-            with open(dockerfile_path, 'r') as f:
+            with open(dockerfile_path, "r") as f:
                 content = f.read()
-            
+
             # Look for ARG or ENV declarations
             patterns = [
                 r"ARG\s+MAD_SYSTEM_GPU_ARCHITECTURE=([^\s\n]+)",
-                r"ENV\s+MAD_SYSTEM_GPU_ARCHITECTURE=([^\s\n]+)"
+                r"ENV\s+MAD_SYSTEM_GPU_ARCHITECTURE=([^\s\n]+)",
             ]
-            
+
             for pattern in patterns:
                 match = re.search(pattern, content, re.IGNORECASE)
                 if match:
-                    return match.group(1).strip('"\'')
+                    return match.group(1).strip("\"'")
         except Exception:
             pass
-        
+
         return None
 
     def _infer_gpu_vendor_from_dockerfile(self, dockerfile: str) -> str:
         """Infer GPU vendor from dockerfile path.
-        
+
         Args:
             dockerfile: Path to dockerfile (e.g., "docker/dummy.ubuntu.amd.Dockerfile")
-            
+
         Returns:
             GPU vendor string: "AMD", "NVIDIA", or ""
         """
         dockerfile_lower = dockerfile.lower()
-        
+
         # Check for explicit vendor indicators in filename
-        if '.amd.' in dockerfile_lower or dockerfile_lower.endswith('.amd'):
+        if ".amd." in dockerfile_lower or dockerfile_lower.endswith(".amd"):
             return "AMD"
-        elif '.nvidia.' in dockerfile_lower or dockerfile_lower.endswith('.nvidia'):
+        elif ".nvidia." in dockerfile_lower or dockerfile_lower.endswith(".nvidia"):
             return "NVIDIA"
-        
+
         # Try to infer from base image in Dockerfile
         try:
-            with open(dockerfile, 'r') as f:
+            with open(dockerfile, "r") as f:
                 content = f.read()
-            
+
             # Look for base image indicators
-            if 'FROM' in content:
-                if 'rocm' in content.lower() or 'amd' in content.lower():
+            if "FROM" in content:
+                if "rocm" in content.lower() or "amd" in content.lower():
                     return "AMD"
-                elif 'nvidia' in content.lower() or 'cuda' in content.lower():
+                elif "nvidia" in content.lower() or "cuda" in content.lower():
                     return "NVIDIA"
         except Exception:
             pass
-        
+
         # Default to empty (legacy - will be treated as compatible with all)
         return ""
 
@@ -728,13 +801,15 @@ def _create_base_image_name(self, model_info: typing.Dict, dockerfile: str) -> s
         """Create base image name from model info and dockerfile."""
         # Extract dockerfile context suffix (e.g., "ubuntu.amd" from "dummy.ubuntu.amd.Dockerfile")
         dockerfile_name = os.path.basename(dockerfile)
-        if '.' in dockerfile_name:
+        if "." in dockerfile_name:
             # Remove the .Dockerfile extension and get context
-            context_parts = dockerfile_name.replace('.Dockerfile', '').split('.')[1:]  # Skip model name
-            context_suffix = '.'.join(context_parts) if context_parts else 'default'
+            context_parts = dockerfile_name.replace(".Dockerfile", "").split(".")[
+                1:
+            ]  # Skip model name
+            context_suffix = ".".join(context_parts) if context_parts else "default"
         else:
-            context_suffix = 'default'
-        
+            context_suffix = "default"
+
         # Create base image name: ci-{model}_{model}.{context}
         return f"ci-{model_info['name']}_{model_info['name']}.{context_suffix}"
 
@@ -822,55 +897,57 @@ def _determine_registry_image_name(
         return registry_image
 
     def _build_model_for_arch(
-        self, 
+        self,
         model_info: typing.Dict,
         gpu_arch: str,
         credentials: typing.Dict,
         clean_cache: bool,
         registry: str,
         phase_suffix: str,
-        batch_build_metadata: typing.Optional[dict]
+        batch_build_metadata: typing.Optional[dict],
     ) -> typing.List[typing.Dict]:
         """Build model for specific GPU architecture with smart image naming."""
-        
+
         # Find dockerfiles
         dockerfiles = self._get_dockerfiles_for_model(model_info)
-        
+
         arch_results = []
         for dockerfile in dockerfiles:
             # When using --target-archs, always add architecture suffix regardless of GPU variables
             # This ensures consistent naming for multi-architecture builds
             base_image_name = self._create_base_image_name(model_info, dockerfile)
             arch_image_name = f"{base_image_name}_{gpu_arch}"
-            
+
             # Set MAD_SYSTEM_GPU_ARCHITECTURE for this build
             arch_build_args = {"MAD_SYSTEM_GPU_ARCHITECTURE": gpu_arch}
-            
+
             # Build the image
             build_info = self.build_image(
-                model_info, 
-                dockerfile, 
+                model_info,
+                dockerfile,
                 credentials,
-                clean_cache, 
+                clean_cache,
                 phase_suffix,
                 additional_build_args=arch_build_args,
-                override_image_name=arch_image_name
+                override_image_name=arch_image_name,
             )
-            
+
             # Add architecture metadata
             build_info["gpu_architecture"] = gpu_arch
-            
+
             # Handle registry push with architecture-specific tagging
             if registry:
                 registry_image = self._determine_registry_image_name(
                     arch_image_name, registry, credentials
                 )
                 try:
-                    self.push_image(arch_image_name, registry, credentials, registry_image)
+                    self.push_image(
+                        arch_image_name, registry, credentials, registry_image
+                    )
                     build_info["registry_image"] = registry_image
                 except Exception as e:
                     build_info["push_error"] = str(e)
-            
+
             arch_results.append(build_info)
-        
+
         return arch_results
diff --git a/src/madengine/execution/dockerfile_utils.py b/src/madengine/execution/dockerfile_utils.py
index c8392b5a..ee663d8f 100644
--- a/src/madengine/execution/dockerfile_utils.py
+++ b/src/madengine/execution/dockerfile_utils.py
@@ -32,7 +32,7 @@ def parse_dockerfile_gpu_variables(
 
         all_matches = arg_matches + env_matches
         if all_matches:
-            raw_value = all_matches[-1].strip('"\'')
+            raw_value = all_matches[-1].strip("\"'")
             parsed_values = parse_gpu_variable_value(var_name, raw_value)
             if parsed_values:
                 gpu_variables[var_name] = parsed_values
diff --git a/src/madengine/orchestration/__init__.py b/src/madengine/orchestration/__init__.py
index e3dce29a..93c53110 100644
--- a/src/madengine/orchestration/__init__.py
+++ b/src/madengine/orchestration/__init__.py
@@ -13,4 +13,3 @@
 from .run_orchestrator import RunOrchestrator
 
 __all__ = ["BuildOrchestrator", "RunOrchestrator"]
-
diff --git a/src/madengine/orchestration/build_orchestrator.py b/src/madengine/orchestration/build_orchestrator.py
index d905f3b4..d78e08e8 100644
--- a/src/madengine/orchestration/build_orchestrator.py
+++ b/src/madengine/orchestration/build_orchestrator.py
@@ -16,21 +16,21 @@
 from rich.console import Console as RichConsole
 from rich.panel import Panel
 
-from madengine.core.console import Console
-from madengine.core.context import Context
 from madengine.core.additional_context_defaults import apply_build_context_defaults
 from madengine.core.auth import load_credentials
+from madengine.core.console import Console
+from madengine.core.context import Context
 from madengine.core.errors import (
     BuildError,
     ConfigurationError,
     DiscoveryError,
     create_error_context,
 )
-from madengine.utils.discover_models import DiscoverModels
 from madengine.execution.docker_builder import DockerBuilder
 from madengine.execution.dockerfile_utils import (
     dockerfile_requires_explicit_mad_arch_build_arg,
 )
+from madengine.utils.discover_models import DiscoverModels
 
 
 class BuildOrchestrator:
@@ -45,7 +45,12 @@ class BuildOrchestrator:
     - Save deployment_config from --additional-context
     """
 
-    def __init__(self, args, additional_context: Optional[Dict] = None, detect_local_gpu_arch: bool = False):
+    def __init__(
+        self,
+        args,
+        additional_context: Optional[Dict] = None,
+        detect_local_gpu_arch: bool = False,
+    ):
         """
         Initialize build orchestrator.
 
@@ -63,7 +68,7 @@ def __init__(self, args, additional_context: Optional[Dict] = None, detect_local
 
         # Merge additional_context from args and parameter
         merged_context = {}
-        
+
         # Load from file first if provided
         if hasattr(args, "additional_context_file") and args.additional_context_file:
             try:
@@ -71,7 +76,7 @@ def __init__(self, args, additional_context: Optional[Dict] = None, detect_local
                     merged_context = json.load(f)
             except (FileNotFoundError, json.JSONDecodeError) as e:
                 print(f"Warning: Could not load additional_context_file: {e}")
-        
+
         # Then merge string additional_context (overrides file)
         if hasattr(args, "additional_context") and args.additional_context:
             try:
@@ -79,6 +84,7 @@ def __init__(self, args, additional_context: Optional[Dict] = None, detect_local
                     # Use ast.literal_eval for Python dict syntax (single quotes)
                     # This matches what Context class expects
                     import ast
+
                     context_from_string = ast.literal_eval(args.additional_context)
                     merged_context.update(context_from_string)
                 elif isinstance(args.additional_context, dict):
@@ -95,31 +101,42 @@ def __init__(self, args, additional_context: Optional[Dict] = None, detect_local
         apply_build_context_defaults(merged_context)
 
         self.additional_context = merged_context
-        
+
         # Apply ConfigLoader to infer deploy type, validate, and apply defaults
         if self.additional_context:
             try:
                 from madengine.deployment.config_loader import ConfigLoader
+
                 # This will:
                 # 1. Infer deploy type from k8s/slurm presence
                 # 2. Validate for conflicts (e.g., both k8s and slurm)
                 # 3. Apply appropriate defaults
                 # 4. Add 'deploy' field for internal use
-                self.additional_context = ConfigLoader.load_config(self.additional_context)
+                self.additional_context = ConfigLoader.load_config(
+                    self.additional_context
+                )
             except ValueError as e:
                 # Re-raise as ConfigurationError so the CLI layer handles the exit code
                 raise ConfigurationError(str(e))
             except Exception as e:
                 # Other errors during config loading - warn but continue
-                self.rich_console.print(f"[yellow]Warning: Could not apply config defaults: {e}[/yellow]")
+                self.rich_console.print(
+                    f"[yellow]Warning: Could not apply config defaults: {e}[/yellow]"
+                )
 
         self.rich_console.print("[bold blue]Build additional context[/bold blue]\n")
-        self.rich_console.print(Panel(
-            json.dumps(self.additional_context, indent=2) if self.additional_context else "(empty)",
-            title="[bold]Context[/bold] (from --additional-context / --additional-context-file)",
-            border_style="dim",
-            padding=(0, 1),
-        ))
+        self.rich_console.print(
+            Panel(
+                (
+                    json.dumps(self.additional_context, indent=2)
+                    if self.additional_context
+                    else "(empty)"
+                ),
+                title="[bold]Context[/bold] (from --additional-context / --additional-context-file)",
+                border_style="dim",
+                padding=(0, 1),
+            )
+        )
         self.rich_console.print()
 
         # Initialize context in build-only mode (no GPU detection by default).
@@ -140,7 +157,7 @@ def __init__(self, args, additional_context: Optional[Dict] = None, detect_local
 
     def _copy_scripts(self):
         """[DEPRECATED] Copy common scripts to model directories.
-        
+
         This method is no longer called during build phase as it's not needed.
         Build phase only creates Docker images - script execution happens in run phase.
         Scripts are copied by run_orchestrator._copy_scripts() for local execution.
@@ -247,14 +264,18 @@ def execute(
             )
             self._warn_if_mad_arch_unresolved_for_dockerfiles(models, builder)
 
-            resolved_arch = self.context.ctx.get("docker_build_arg", {}).get("MAD_SYSTEM_GPU_ARCHITECTURE")
+            resolved_arch = self.context.ctx.get("docker_build_arg", {}).get(
+                "MAD_SYSTEM_GPU_ARCHITECTURE"
+            )
             if resolved_arch:
                 self.rich_console.print(
                     f"[green]✓ MAD_SYSTEM_GPU_ARCHITECTURE resolved: {resolved_arch}[/green]\n"
                 )
 
             # Step 3: Build Docker images
-            self.rich_console.print("[bold cyan]🏗️  Building Docker images...[/bold cyan]")
+            self.rich_console.print(
+                "[bold cyan]🏗️  Building Docker images...[/bold cyan]"
+            )
 
             # Determine phase suffix for log files
             # Build phase always uses .build suffix to avoid conflicts with run logs
@@ -302,8 +323,12 @@ def execute(
                     self.rich_console.print(f"  [red]• {model_name}: {error_msg}[/red]")
 
             # Step 4: ALWAYS generate manifest (even with partial failures)
-            self.rich_console.print("\n[bold cyan]📄 Generating build manifest...[/bold cyan]")
-            builder.export_build_manifest(manifest_output, registry, batch_build_metadata)
+            self.rich_console.print(
+                "\n[bold cyan]📄 Generating build manifest...[/bold cyan]"
+            )
+            builder.export_build_manifest(
+                manifest_output, registry, batch_build_metadata
+            )
 
             # Step 5: Save build summary to manifest
             self._save_build_summary(manifest_output, build_summary)
@@ -311,7 +336,9 @@ def execute(
             # Step 6: Save deployment_config to manifest
             self._save_deployment_config(manifest_output)
 
-            self.rich_console.print(f"[green]✓ Build complete: {manifest_output}[/green]")
+            self.rich_console.print(
+                f"[green]✓ Build complete: {manifest_output}[/green]"
+            )
             self.rich_console.print(f"[dim]{'=' * 60}[/dim]\n")
 
             # Step 7: Check if we should fail (only if ALL builds failed)
@@ -369,12 +396,16 @@ def _save_build_summary(self, manifest_file: str, build_summary: Dict):
                 json.dump(manifest, f, indent=2)
 
         except Exception as e:
-            self.rich_console.print(f"[yellow]Warning: Could not save build summary: {e}[/yellow]")
+            self.rich_console.print(
+                f"[yellow]Warning: Could not save build summary: {e}[/yellow]"
+            )
 
     def _save_deployment_config(self, manifest_file: str):
         """Save deployment_config from --additional-context to manifest."""
         if not self.additional_context:
-            self.rich_console.print("[dim]No additional_context provided, skipping deployment config[/dim]")
+            self.rich_console.print(
+                "[dim]No additional_context provided, skipping deployment config[/dim]"
+            )
             return
 
         try:
@@ -388,18 +419,22 @@ def _save_deployment_config(self, manifest_file: str):
                 # Auto-detect based on config presence
                 if self.additional_context.get("slurm"):
                     target = "slurm"
-                elif self.additional_context.get("k8s") or self.additional_context.get("kubernetes"):
+                elif self.additional_context.get("k8s") or self.additional_context.get(
+                    "kubernetes"
+                ):
                     target = "k8s"
                 else:
                     target = "local"
-            
+
             # Get env_vars and filter out MIOPEN_USER_DB_PATH
             # This variable must be set per-process in multi-GPU training to avoid database conflicts
             env_vars = self.additional_context.get("env_vars", {}).copy()
             if "MIOPEN_USER_DB_PATH" in env_vars:
                 del env_vars["MIOPEN_USER_DB_PATH"]
-                print("ℹ️  Filtered MIOPEN_USER_DB_PATH from env_vars (will be set per-process in training)")
-            
+                print(
+                    "ℹ️  Filtered MIOPEN_USER_DB_PATH from env_vars (will be set per-process in training)"
+                )
+
             deployment_config = {
                 "target": target,
                 "slurm": self.additional_context.get("slurm"),
@@ -416,17 +451,25 @@ def _save_deployment_config(self, manifest_file: str):
                 k: v for k, v in deployment_config.items() if v is not None
             }
 
-            if deployment_config and deployment_config != {"target": "local", "env_vars": {}}:
+            if deployment_config and deployment_config != {
+                "target": "local",
+                "env_vars": {},
+            }:
                 manifest["deployment_config"] = deployment_config
 
                 with open(manifest_file, "w") as f:
                     json.dump(manifest, f, indent=2)
 
-                self.rich_console.print(f"[green]✓ Saved deployment config to {manifest_file}[/green]")
+                self.rich_console.print(
+                    f"[green]✓ Saved deployment config to {manifest_file}[/green]"
+                )
             else:
-                self.rich_console.print("[dim]No deployment config to save (local execution)[/dim]")
+                self.rich_console.print(
+                    "[dim]No deployment config to save (local execution)[/dim]"
+                )
 
         except Exception as e:
             # Non-fatal - just warn
-            self.rich_console.print(f"[yellow]Warning: Could not save deployment config: {e}[/yellow]")
-
+            self.rich_console.print(
+                f"[yellow]Warning: Could not save deployment config: {e}[/yellow]"
+            )
diff --git a/src/madengine/orchestration/image_filtering.py b/src/madengine/orchestration/image_filtering.py
index 88a8725e..5026f35e 100644
--- a/src/madengine/orchestration/image_filtering.py
+++ b/src/madengine/orchestration/image_filtering.py
@@ -43,13 +43,19 @@ def filter_images_by_gpu_compatibility(
                     compatible[model_name] = image_info
                 else:
                     skipped.append(
-                        (model_name, f"architecture mismatch ({image_arch} != {runtime_gpu_arch})")
+                        (
+                            model_name,
+                            f"architecture mismatch ({image_arch} != {runtime_gpu_arch})",
+                        )
                     )
             else:
                 compatible[model_name] = image_info
         else:
             skipped.append(
-                (model_name, f"GPU vendor mismatch ({image_gpu_vendor} != {runtime_gpu_vendor})")
+                (
+                    model_name,
+                    f"GPU vendor mismatch ({image_gpu_vendor} != {runtime_gpu_vendor})",
+                )
             )
 
     return compatible, skipped
diff --git a/src/madengine/orchestration/run_orchestrator.py b/src/madengine/orchestration/run_orchestrator.py
index 6742b2a5..bdd7b13d 100644
--- a/src/madengine/orchestration/run_orchestrator.py
+++ b/src/madengine/orchestration/run_orchestrator.py
@@ -68,33 +68,44 @@ def __init__(self, args, additional_context: Optional[Dict] = None):
                     # Use ast.literal_eval for Python dict syntax (single quotes)
                     # This matches what Context class expects
                     import ast
+
                     parsed = ast.literal_eval(args.additional_context)
                     merged_context = parsed if isinstance(parsed, dict) else {}
                 elif isinstance(args.additional_context, dict):
                     merged_context = args.additional_context
             except (ValueError, SyntaxError) as e:
-                self.rich_console.print(f"[yellow]Warning: Could not parse additional_context: {e}[/yellow]")
+                self.rich_console.print(
+                    f"[yellow]Warning: Could not parse additional_context: {e}[/yellow]"
+                )
                 if args.additional_context:
-                    self.rich_console.print(f"[dim]Raw (first 200 chars): {str(args.additional_context)[:200]}[/dim]")
+                    self.rich_console.print(
+                        f"[dim]Raw (first 200 chars): {str(args.additional_context)[:200]}[/dim]"
+                    )
                 pass
 
         if additional_context:
             merged_context.update(additional_context)
 
         self.additional_context = merged_context
-        keys_str = ", ".join(sorted(self.additional_context.keys())) if self.additional_context else "(none)"
-        self.rich_console.print(f"[dim]Run additional context (CLI):[/dim] [cyan]{keys_str}[/cyan]")
+        keys_str = (
+            ", ".join(sorted(self.additional_context.keys()))
+            if self.additional_context
+            else "(none)"
+        )
+        self.rich_console.print(
+            f"[dim]Run additional context (CLI):[/dim] [cyan]{keys_str}[/cyan]"
+        )
 
         # Track if we copied MODEL_DIR contents (for cleanup)
         self._copied_from_model_dir = False
-        
+
         # Track if we ran build phase in this workflow (for log combination)
         self._did_build_phase = False
-        
+
         # Initialize session tracker for filtering current run results
         perf_csv_path = getattr(args, "output", "perf.csv")
         self.session_tracker = SessionTracker(perf_csv_path)
-        
+
         # Initialize context in runtime mode (with GPU detection for local)
         # This will be lazy-initialized only when needed
         self.context = None
@@ -104,14 +115,14 @@ def _init_runtime_context(self):
         """Initialize runtime context (with GPU detection)."""
         # Always reinitialize context in runtime mode for run phase
         # This ensures GPU detection and proper runtime context even after build phase
-        
+
         # Context expects additional_context as a string representation of Python dict
         # Use repr() instead of json.dumps() because Context uses ast.literal_eval()
         if self.additional_context:
             context_string = repr(self.additional_context)
         else:
             context_string = None
-            
+
         self.context = Context(
             additional_context=context_string,
             build_only_mode=False,
@@ -171,7 +182,7 @@ def execute(
             mad_container_image = None
             if self.additional_context:
                 mad_container_image = self.additional_context.get("MAD_CONTAINER_IMAGE")
-            
+
             if mad_container_image:
                 # Local image mode: Skip build, create synthetic manifest
                 if not tags:
@@ -186,14 +197,16 @@ def execute(
                             "Example: --tags model_name --additional-context \"{'MAD_CONTAINER_IMAGE': 'rocm/tensorflow:latest'}\"",
                         ],
                     )
-                
+
                 # Generate synthetic manifest using the provided image
                 manifest_file = self._create_manifest_from_local_image(
                     image_name=mad_container_image,
                     tags=tags,
-                    manifest_output=getattr(self.args, "manifest_output", "build_manifest.json"),
+                    manifest_output=getattr(
+                        self.args, "manifest_output", "build_manifest.json"
+                    ),
                 )
-            
+
             # Step 1: Ensure we have a manifest (build if needed)
             elif not manifest_file or not os.path.exists(manifest_file):
                 if not tags:
@@ -209,7 +222,9 @@ def execute(
                         ],
                     )
 
-                self.rich_console.print("[cyan]No manifest found, building first...[/cyan]\n")
+                self.rich_console.print(
+                    "[cyan]No manifest found, building first...[/cyan]\n"
+                )
                 manifest_file = self._build_phase(tags, registry)
                 self._did_build_phase = True  # Mark that we built in this workflow
 
@@ -220,44 +235,66 @@ def execute(
             # (with optional runtime override)
             with open(manifest_file) as f:
                 manifest = json.load(f)
-            
+
             deployment_config = manifest.get("deployment_config", {})
-            
+
             # Update additional_context with deployment_config for deployment layer
             if not self.additional_context:
                 self.additional_context = {}
-            
+
             # Merge deployment_config into additional_context (for deployment layer to use)
-            for key in ["slurm", "k8s", "kubernetes", "distributed", "vllm", "env_vars", "debug"]:
+            for key in [
+                "slurm",
+                "k8s",
+                "kubernetes",
+                "distributed",
+                "vllm",
+                "env_vars",
+                "debug",
+            ]:
                 if key in deployment_config and key not in self.additional_context:
                     self.additional_context[key] = deployment_config[key]
-            
+
             # Display manifest entries: context (from build) and deployment_config (run/deploy)
             self.rich_console.print("[bold blue]Build manifest breakdown[/bold blue]\n")
             manifest_context = manifest.get("context", {})
-            self.rich_console.print(Panel(
-                json.dumps(manifest_context, indent=2) if manifest_context else "(empty)",
-                title="[bold]Manifest context[/bold] (from build additional context)",
-                border_style="dim",
-                padding=(0, 1),
-            ))
-            self.rich_console.print(Panel(
-                json.dumps(deployment_config, indent=2) if deployment_config else "(empty)",
-                title="[bold]Manifest deployment_config[/bold]",
-                border_style="dim",
-                padding=(0, 1),
-            ))
+            self.rich_console.print(
+                Panel(
+                    (
+                        json.dumps(manifest_context, indent=2)
+                        if manifest_context
+                        else "(empty)"
+                    ),
+                    title="[bold]Manifest context[/bold] (from build additional context)",
+                    border_style="dim",
+                    padding=(0, 1),
+                )
+            )
+            self.rich_console.print(
+                Panel(
+                    (
+                        json.dumps(deployment_config, indent=2)
+                        if deployment_config
+                        else "(empty)"
+                    ),
+                    title="[bold]Manifest deployment_config[/bold]",
+                    border_style="dim",
+                    padding=(0, 1),
+                )
+            )
             self.rich_console.print()
 
             # Infer deployment target from config structure (Convention over Configuration)
             # No explicit "deploy" field needed - presence of k8s/slurm indicates deployment type
             target = self._infer_deployment_target(self.additional_context)
-            
+
             # Legacy support: check manifest for explicit target
             if not target or target == "local":
                 target = deployment_config.get("target", "local")
-            
-            self.rich_console.print(f"[bold cyan]Deployment target: {target}[/bold cyan]\n")
+
+            self.rich_console.print(
+                f"[bold cyan]Deployment target: {target}[/bold cyan]\n"
+            )
 
             # Use `is True` so MagicMock-based test doubles do not count as enabled.
             skip_requested = getattr(self.args, "skip_model_run", False) is True
@@ -293,28 +330,34 @@ def execute(
                     results = self._execute_local(manifest_file, timeout)
                 else:
                     results = self._execute_distributed(target, manifest_file)
-                
+
                 # Combine build and run logs for full workflow
                 if self._did_build_phase and (target == "local" or target == "docker"):
                     self._combine_build_and_run_logs(manifest_file)
-                
+
                 # Add session information to results for filtering
                 results["session_start_row"] = session_start_row
-                results["session_row_count"] = self.session_tracker.get_session_row_count()
-                
+                results["session_row_count"] = (
+                    self.session_tracker.get_session_row_count()
+                )
+
                 # Always cleanup madengine package files after execution
-                self.rich_console.print("\n[dim]🧹 Cleaning up madengine package files...[/dim]")
+                self.rich_console.print(
+                    "\n[dim]🧹 Cleaning up madengine package files...[/dim]"
+                )
                 self._cleanup_model_dir_copies()
-                
+
                 # NOTE: Do NOT cleanup session marker here!
                 # It's needed by display functions in CLI layer
                 # Cleanup happens in CLI after display (via perf_csv_path)
-                
+
                 return results
-                
+
             except Exception as e:
                 # Always cleanup madengine package files even on error
-                self.rich_console.print("\n[dim]🧹 Cleaning up madengine package files...[/dim]")
+                self.rich_console.print(
+                    "\n[dim]🧹 Cleaning up madengine package files...[/dim]"
+                )
                 self._cleanup_model_dir_copies()
                 raise
 
@@ -360,56 +403,63 @@ def _build_phase(self, tags: list, registry: Optional[str] = None) -> str:
         return manifest_file
 
     def _create_manifest_from_local_image(
-        self, 
-        image_name: str, 
-        tags: list, 
-        manifest_output: str = "build_manifest.json"
+        self, image_name: str, tags: list, manifest_output: str = "build_manifest.json"
     ) -> str:
         """
         Create a synthetic manifest for a user-provided local image.
-        
+
         This enables MAD_CONTAINER_IMAGE functionality where users can skip
         the build phase and directly run models using a pre-existing Docker image.
-        
+
         Args:
             image_name: Docker image name/tag (e.g., 'rocm/tensorflow:latest')
             tags: Model tags to discover
             manifest_output: Output path for the manifest file
-            
+
         Returns:
             Path to the generated manifest file
-            
+
         Raises:
             DiscoveryError: If no models are found
             RuntimeError: If image validation fails
         """
         from madengine.utils.discover_models import DiscoverModels
         from madengine.core.errors import DiscoveryError
-        
-        self.rich_console.print(f"[yellow]🏠 Local Image Mode: Using {image_name}[/yellow]")
-        self.rich_console.print(f"[dim]Skipping build phase, creating synthetic manifest...[/dim]\n")
-        
+
+        self.rich_console.print(
+            f"[yellow]🏠 Local Image Mode: Using {image_name}[/yellow]"
+        )
+        self.rich_console.print(
+            f"[dim]Skipping build phase, creating synthetic manifest...[/dim]\n"
+        )
+
         # Validate that the image exists locally or can be pulled
         try:
             self.console.sh(f"docker image inspect {image_name} > /dev/null 2>&1")
-            self.rich_console.print(f"[green]✓ Image {image_name} found locally[/green]")
+            self.rich_console.print(
+                f"[green]✓ Image {image_name} found locally[/green]"
+            )
         except (subprocess.CalledProcessError, RuntimeError) as e:
-            self.rich_console.print(f"[yellow]⚠️  Image {image_name} not found locally, attempting to pull...[/yellow]")
+            self.rich_console.print(
+                f"[yellow]⚠️  Image {image_name} not found locally, attempting to pull...[/yellow]"
+            )
             try:
                 self.console.sh(f"docker pull {image_name}")
-                self.rich_console.print(f"[green]✓ Successfully pulled {image_name}[/green]")
+                self.rich_console.print(
+                    f"[green]✓ Successfully pulled {image_name}[/green]"
+                )
             except Exception as e:
                 raise RuntimeError(
                     f"Failed to find or pull image {image_name}. "
                     f"Ensure the image exists locally or can be pulled from a registry. "
                     f"Error: {e}"
                 )
-        
+
         # Discover models by tags (without building)
         self.args.tags = tags
         discover_models = DiscoverModels(args=self.args)
         models = discover_models.run()
-        
+
         if not models:
             raise DiscoveryError(
                 "No models discovered for local image mode",
@@ -423,17 +473,21 @@ def _create_manifest_from_local_image(
                     "Ensure model definitions have matching tags",
                 ],
             )
-        
-        self.rich_console.print(f"[green]✓ Discovered {len(models)} model(s) for tags: {tags}[/green]\n")
-        
+
+        self.rich_console.print(
+            f"[green]✓ Discovered {len(models)} model(s) for tags: {tags}[/green]\n"
+        )
+
         # Initialize build-only context for manifest generation
         # (we need context structure, but skip GPU detection since we're not building)
-        context_string = repr(self.additional_context) if self.additional_context else None
+        context_string = (
+            repr(self.additional_context) if self.additional_context else None
+        )
         build_context = Context(
             additional_context=context_string,
             build_only_mode=True,
         )
-        
+
         # Create manifest structure
         manifest = {
             "built_images": {},
@@ -443,13 +497,13 @@ def _create_manifest_from_local_image(
             "local_image_name": image_name,
             "deployment_config": self.additional_context.get("deployment_config", {}),
         }
-        
+
         # For each model, create a synthetic entry using the provided image
         for model in models:
             model_name = model["name"]
             # Create a synthetic image identifier (not an actual built image)
             synthetic_image_id = f"local-{model_name.replace('/', '_')}"
-            
+
             manifest["built_images"][synthetic_image_id] = {
                 "docker_image": image_name,  # Use user-provided image
                 "dockerfile": "N/A (local image mode)",
@@ -458,22 +512,26 @@ def _create_manifest_from_local_image(
                 "local_image": True,
                 "registry_image": None,
             }
-            
+
             # Convert data list to comma-separated string (required by dataprovider)
             data_field = model.get("data", [])
             if isinstance(data_field, list):
                 data_str = ",".join(data_field) if data_field else ""
             else:
                 data_str = data_field if data_field else ""
-            
+
             # Build model info dict with all fields that ContainerRunner expects
             # Use exact field names from models.json format
             manifest["built_models"][synthetic_image_id] = {
                 "name": model_name,
                 "tags": model.get("tags", []),
                 "dockerfile": "N/A (local image mode)",
-                "scripts": model.get("scripts", ""),  # models.json uses "scripts" (plural)
-                "n_gpus": model.get("n_gpus", "1"),  # models.json uses "n_gpus" (string format)
+                "scripts": model.get(
+                    "scripts", ""
+                ),  # models.json uses "scripts" (plural)
+                "n_gpus": model.get(
+                    "n_gpus", "1"
+                ),  # models.json uses "n_gpus" (string format)
                 "owner": model.get("owner", ""),
                 "training_precision": model.get("training_precision", ""),
                 "args": model.get("args", ""),  # Required field for docker run
@@ -482,16 +540,22 @@ def _create_manifest_from_local_image(
                 "cred": model.get("cred", ""),
                 "deprecated": model.get("deprecated", False),
                 "skip_gpu_arch": model.get("skip_gpu_arch", []),
-                "additional_docker_run_options": model.get("additional_docker_run_options", ""),
+                "additional_docker_run_options": model.get(
+                    "additional_docker_run_options", ""
+                ),
             }
-        
+
         # Write manifest to file
         with open(manifest_output, "w") as f:
             json.dump(manifest, f, indent=2)
-        
-        self.rich_console.print(f"[green]✓ Generated synthetic manifest: {manifest_output}[/green]")
-        self.rich_console.print(f"[yellow]⚠️  Warning: User-provided image {image_name}. Model support not guaranteed.[/yellow]\n")
-        
+
+        self.rich_console.print(
+            f"[green]✓ Generated synthetic manifest: {manifest_output}[/green]"
+        )
+        self.rich_console.print(
+            f"[yellow]⚠️  Warning: User-provided image {image_name}. Model support not guaranteed.[/yellow]\n"
+        )
+
         return manifest_output
 
     def _load_and_merge_manifest(self, manifest_file: str) -> str:
@@ -510,22 +574,31 @@ def _load_and_merge_manifest(self, manifest_file: str) -> str:
             if "deployment_config" in manifest:
                 stored_config = manifest["deployment_config"]
                 # Runtime --additional-context overrides stored config
-                for key in ["deploy", "slurm", "k8s", "kubernetes", "distributed", "vllm", "env_vars", "debug"]:
+                for key in [
+                    "deploy",
+                    "slurm",
+                    "k8s",
+                    "kubernetes",
+                    "distributed",
+                    "vllm",
+                    "env_vars",
+                    "debug",
+                ]:
                     if key in self.additional_context:
                         stored_config[key] = self.additional_context[key]
                 manifest["deployment_config"] = stored_config
-            
+
             # Merge context (tools, pre_scripts, post_scripts, encapsulate_script)
             if "context" not in manifest:
                 manifest["context"] = {}
-            
+
             merge_keys = ["tools", "pre_scripts", "post_scripts", "encapsulate_script"]
             context_updated = False
             for key in merge_keys:
                 if key in self.additional_context:
                     manifest["context"][key] = self.additional_context[key]
                     context_updated = True
-            
+
             if context_updated or "deployment_config" in manifest:
                 # Write back merged config
                 with open(manifest_file, "w") as f:
@@ -541,16 +614,18 @@ def _execute_local(self, manifest_file: str, timeout: int) -> Dict:
         # Load manifest first to check if we have Docker images
         with open(manifest_file, "r") as f:
             manifest = json.load(f)
-        
+
         has_docker_images = bool(manifest.get("built_images", {}))
-        
+
         if has_docker_images:
             # Using Docker containers - containers have GPU support built-in
-            self.rich_console.print("[dim cyan]Using Docker containers with built-in GPU support[/dim cyan]\n")
-        
+            self.rich_console.print(
+                "[dim cyan]Using Docker containers with built-in GPU support[/dim cyan]\n"
+            )
+
         # Initialize runtime context (runs full GPU detection on compute nodes)
         self._init_runtime_context()
-        
+
         # Show node info
         self._show_node_info()
 
@@ -570,9 +645,14 @@ def _execute_local(self, manifest_file: str, timeout: int) -> Dict:
             if "post_scripts" in manifest_context:
                 self.context.ctx["post_scripts"] = manifest_context["post_scripts"]
             if "encapsulate_script" in manifest_context:
-                self.context.ctx["encapsulate_script"] = manifest_context["encapsulate_script"]
+                self.context.ctx["encapsulate_script"] = manifest_context[
+                    "encapsulate_script"
+                ]
             # Restore docker_env_vars from build context (e.g. MAD_SECRET_HFTOKEN for Primus HF-backed configs)
-            if "docker_env_vars" in manifest_context and manifest_context["docker_env_vars"]:
+            if (
+                "docker_env_vars" in manifest_context
+                and manifest_context["docker_env_vars"]
+            ):
                 if "docker_env_vars" not in self.context.ctx:
                     self.context.ctx["docker_env_vars"] = {}
                 for k, v in manifest_context["docker_env_vars"].items():
@@ -589,9 +669,13 @@ def _execute_local(self, manifest_file: str, timeout: int) -> Dict:
             if "pre_scripts" in self.additional_context:
                 self.context.ctx["pre_scripts"] = self.additional_context["pre_scripts"]
             if "post_scripts" in self.additional_context:
-                self.context.ctx["post_scripts"] = self.additional_context["post_scripts"]
+                self.context.ctx["post_scripts"] = self.additional_context[
+                    "post_scripts"
+                ]
             if "encapsulate_script" in self.additional_context:
-                self.context.ctx["encapsulate_script"] = self.additional_context["encapsulate_script"]
+                self.context.ctx["encapsulate_script"] = self.additional_context[
+                    "encapsulate_script"
+                ]
 
         # Filter images by GPU vendor and architecture
         # Filter images by GPU compatibility
@@ -604,10 +688,14 @@ def _execute_local(self, manifest_file: str, timeout: int) -> Dict:
 
             if has_docker_images:
                 # Docker images: filter by GPU vendor at runtime to avoid cross-vendor execution
-                self.rich_console.print("[dim cyan]Filtering Docker images by runtime GPU compatibility...[/dim cyan]")
+                self.rich_console.print(
+                    "[dim cyan]Filtering Docker images by runtime GPU compatibility...[/dim cyan]"
+                )
             else:
                 # Bare-metal execution: filter by runtime GPU
-                self.rich_console.print("[dim cyan]Filtering bare-metal images by runtime GPU compatibility...[/dim cyan]")
+                self.rich_console.print(
+                    "[dim cyan]Filtering bare-metal images by runtime GPU compatibility...[/dim cyan]"
+                )
 
             compatible_images = self._filter_images_by_gpu_compatibility(
                 manifest["built_images"], runtime_gpu_vendor, runtime_gpu_arch
@@ -629,30 +717,37 @@ def _execute_local(self, manifest_file: str, timeout: int) -> Dict:
 
             manifest["built_images"] = compatible_images
             print(f"Filtered to {len(compatible_images)} compatible images\n")
-            
+
             # Filter by skip_gpu_arch from model definitions (applies to both Docker and bare-metal)
             runtime_gpu_arch = self.context.get_system_gpu_architecture()
             if "built_models" in manifest and compatible_images:
-                self.rich_console.print("[cyan]Checking skip_gpu_arch model restrictions...[/cyan]")
+                self.rich_console.print(
+                    "[cyan]Checking skip_gpu_arch model restrictions...[/cyan]"
+                )
                 compatible_images = self._filter_images_by_skip_gpu_arch(
                     compatible_images, manifest["built_models"], runtime_gpu_arch
                 )
             manifest["built_images"] = compatible_images
-            print(f"After skip_gpu_arch filtering: {len(compatible_images)} images to run\n")
-            
+            print(
+                f"After skip_gpu_arch filtering: {len(compatible_images)} images to run\n"
+            )
+
             # NOTE: Dockerfile context filtering is already done during build phase
             # Re-filtering during run phase causes issues because:
             # 1. The build phase already filtered dockerfiles based on build-time context
             # 2. All built images should be runnable on the runtime node
             # 3. Legacy behavior: filtering happens once (either build or run, not both)
-            
+
             # Write filtered manifest back to file so runner sees the filtered list
             with open(manifest_file, "w") as f:
                 json.dump(manifest, f, indent=2)
 
         except Exception as e:
             import traceback
-            self.rich_console.print(f"[yellow]Warning: GPU/Context filtering failed: {e}[/yellow]")
+
+            self.rich_console.print(
+                f"[yellow]Warning: GPU/Context filtering failed: {e}[/yellow]"
+            )
             self.rich_console.print(f"[red]Traceback: {traceback.format_exc()}[/red]")
             self.rich_console.print("[yellow]Proceeding with all images[/yellow]\n")
 
@@ -701,8 +796,10 @@ def _execute_distributed(self, target: str, manifest_file: str) -> Dict:
 
         # Add runtime flags to additional_context for deployment layer
         if "live_output" not in self.additional_context:
-            self.additional_context["live_output"] = getattr(self.args, "live_output", False)
-        
+            self.additional_context["live_output"] = getattr(
+                self.args, "live_output", False
+            )
+
         # Pass session_start_row for result filtering in collect_results
         session_start_row = self.session_tracker.session_start_row
         if "session_start_row" not in self.additional_context:
@@ -757,37 +854,39 @@ def _show_node_info(self):
         elif "HOST_AZURE" in host_os:
             print(self.console.sh("tdnf info rocm-libs", canFail=True))
         else:
-            self.rich_console.print("[yellow]Warning: Unable to detect host OS[/yellow]")
+            self.rich_console.print(
+                "[yellow]Warning: Unable to detect host OS[/yellow]"
+            )
 
     def _cleanup_model_dir_copies(self):
         """Clean up only madengine package files from scripts/common directory.
-        
+
         This cleanup removes ONLY the files that were copied from madengine package:
         - scripts/common/tools.json
         - scripts/common/test_echo.sh
         - scripts/common/pre_scripts/
         - scripts/common/post_scripts/
         - scripts/common/tools/
-        
+
         This preserves the user's actual scripts/ and docker/ directories in MAD project.
         """
         import shutil
         import subprocess
-        
+
         # Only clean up scripts/common/ subdirectories that came from madengine package
         common_dir = Path("scripts/common")
         if not common_dir.exists():
             return
-        
+
         # List of items to clean up (from madengine package)
         items_to_cleanup = [
             "tools.json",
             "test_echo.sh",
             "pre_scripts",
             "post_scripts",
-            "tools"
+            "tools",
         ]
-        
+
         for item_name in items_to_cleanup:
             item_path = common_dir / item_name
             if item_path.exists():
@@ -798,14 +897,20 @@ def _cleanup_model_dir_copies(self):
                             subprocess.run(
                                 ["chmod", "-R", "+w", str(item_path)],
                                 capture_output=True,
-                                timeout=10
+                                timeout=10,
                             )
-                        except (subprocess.TimeoutExpired, subprocess.CalledProcessError, OSError) as e:
+                        except (
+                            subprocess.TimeoutExpired,
+                            subprocess.CalledProcessError,
+                            OSError,
+                        ) as e:
                             print(f"Warning: chmod failed for {item_path}: {e}")
                         shutil.rmtree(item_path)
                     else:
                         item_path.unlink()
-                    self.rich_console.print(f"[dim]  Cleaned up: scripts/common/{item_name}[/dim]")
+                    self.rich_console.print(
+                        f"[dim]  Cleaned up: scripts/common/{item_name}[/dim]"
+                    )
                 except Exception as e:
                     # Try with sudo for permission issues
                     try:
@@ -813,9 +918,11 @@ def _cleanup_model_dir_copies(self):
                             ["sudo", "rm", "-rf", str(item_path)],
                             check=True,
                             capture_output=True,
-                            timeout=10
+                            timeout=10,
+                        )
+                        self.rich_console.print(
+                            f"[dim]  Cleaned up: scripts/common/{item_name} (elevated)[/dim]"
                         )
-                        self.rich_console.print(f"[dim]  Cleaned up: scripts/common/{item_name} (elevated)[/dim]")
                     except Exception as e2:
                         self.rich_console.print(
                             f"[yellow]⚠️  Warning: Could not clean up {item_path}: {e2}[/yellow]"
@@ -823,84 +930,88 @@ def _cleanup_model_dir_copies(self):
 
     def _combine_build_and_run_logs(self, manifest_file: str):
         """Combine build.live.log and run.live.log into live.log for full workflow.
-        
+
         For full workflow (build + run), this creates a unified log file by:
         1. Reading the manifest to find models that were actually executed in this session
         2. Finding corresponding *.build.live.log and *.run.live.log files for those models
         3. Concatenating them into *.live.log
         4. Keeping the original build and run logs for reference
-        
+
         Args:
             manifest_file: Path to the manifest file containing executed models
         """
         import json
-        
+
         # Load manifest to get list of build log files
         try:
             with open(manifest_file, "r") as f:
                 manifest = json.load(f)
-            
+
             built_images = manifest.get("built_images", {})
             if not built_images:
                 return  # No models to process
         except Exception as e:
-            self.rich_console.print(f"[yellow]⚠️  Warning: Could not load manifest for log combining: {e}[/yellow]")
+            self.rich_console.print(
+                f"[yellow]⚠️  Warning: Could not load manifest for log combining: {e}[/yellow]"
+            )
             return
-        
+
         self.rich_console.print("\n[dim]📝 Combining build and run logs...[/dim]")
         combined_count = 0
-        
+
         # Process each built image
         for image_name, image_info in built_images.items():
             # Get build log file name from manifest
             build_log = image_info.get("log_file")
             if not build_log or not os.path.exists(build_log):
                 continue  # Skip if build log doesn't exist
-            
+
             # Derive the base name and corresponding run log
             base_name = build_log.replace(".build.live.log", "")
             run_log = f"{base_name}.run.live.log"
             combined_log = f"{base_name}.live.log"
-            
+
             # Check if run log exists
             if not os.path.exists(run_log):
                 continue  # Skip if run log doesn't exist
-            
+
             try:
                 # Combine build and run logs
-                with open(combined_log, 'w') as outfile:
+                with open(combined_log, "w") as outfile:
                     # Add build log
-                    with open(build_log, 'r') as infile:
+                    with open(build_log, "r") as infile:
                         outfile.write(infile.read())
-                    
+
                     # Add separator
                     outfile.write("\n" + "=" * 80 + "\n")
                     outfile.write("RUN PHASE LOG\n")
                     outfile.write("=" * 80 + "\n\n")
-                    
+
                     # Add run log
-                    with open(run_log, 'r') as infile:
+                    with open(run_log, "r") as infile:
                         outfile.write(infile.read())
-                
+
                 combined_count += 1
                 self.rich_console.print(f"[dim]  Combined: {combined_log}[/dim]")
-                
+
             except Exception as e:
                 self.rich_console.print(
                     f"[yellow]⚠️  Warning: Could not combine logs for {base_name}: {e}[/yellow]"
                 )
-        
+
         if combined_count > 0:
-            self.rich_console.print(f"[dim]✓ Combined {combined_count} log file(s)[/dim]")
+            self.rich_console.print(
+                f"[dim]✓ Combined {combined_count} log file(s)[/dim]"
+            )
 
     def _copy_scripts(self):
         """Copy common scripts to model directories.
-        
+
         Handles scenarios:
         1. MAD Project: scripts/ already exists in current directory - just add madengine common files
         2. External MODEL_DIR: Copy from external path to current directory
         3. madengine Testing: Copy from src/madengine/scripts/common
-        
+
         NOTE: Does NOT delete existing scripts/ or docker/ directories in current working directory.
         """
         import shutil
@@ -908,19 +1019,27 @@ def _copy_scripts(self):
         # Define ignore function for cache files (used for all copy operations)
         def ignore_cache_files(directory, files):
             """Ignore Python cache files and directories."""
-            return [f for f in files if f.endswith('.pyc') or f == '__pycache__' or f.endswith('.pyo')]
-        
+            return [
+                f
+                for f in files
+                if f.endswith(".pyc") or f == "__pycache__" or f.endswith(".pyo")
+            ]
+
         # Step 1: Check if MODEL_DIR points to external directory and copy if needed
         # MODEL_DIR default is "." (current directory), so only copy if it's different
         model_dir_env = os.environ.get("MODEL_DIR", ".")
         model_dir_abs = os.path.abspath(model_dir_env)
         current_dir_abs = os.path.abspath(".")
-        
+
         # Only copy if MODEL_DIR points to a different directory (not current dir)
         if model_dir_abs != current_dir_abs and os.path.exists(model_dir_env):
-            self.rich_console.print(f"[yellow]📁 External MODEL_DIR detected: {model_dir_env}[/yellow]")
-            self.rich_console.print("[yellow]Copying MODEL_DIR contents for run phase...[/yellow]")
-            
+            self.rich_console.print(
+                f"[yellow]📁 External MODEL_DIR detected: {model_dir_env}[/yellow]"
+            )
+            self.rich_console.print(
+                "[yellow]Copying MODEL_DIR contents for run phase...[/yellow]"
+            )
+
             # Copy docker/ and scripts/ from MODEL_DIR (without deleting existing ones first)
             for subdir in ["docker", "scripts"]:
                 src_path = Path(model_dir_env) / subdir
@@ -929,18 +1048,29 @@ def ignore_cache_files(directory, files):
                     # Use copytree with dirs_exist_ok=True to merge instead of replace
                     if dest_path.exists():
                         # Only warn, don't delete existing directories
-                        self.rich_console.print(f"[dim]  Note: Merging {subdir}/ from MODEL_DIR with existing directory[/dim]")
-                    shutil.copytree(src_path, dest_path, dirs_exist_ok=True, ignore=ignore_cache_files)
-            
-            self.rich_console.print("[green]✓ MODEL_DIR structure copied (docker/, scripts/)[/green]")
+                        self.rich_console.print(
+                            f"[dim]  Note: Merging {subdir}/ from MODEL_DIR with existing directory[/dim]"
+                        )
+                    shutil.copytree(
+                        src_path,
+                        dest_path,
+                        dirs_exist_ok=True,
+                        ignore=ignore_cache_files,
+                    )
+
+            self.rich_console.print(
+                "[green]✓ MODEL_DIR structure copied (docker/, scripts/)[/green]"
+            )
         elif not os.path.exists(model_dir_env):
-            self.rich_console.print(f"[yellow]⚠️  Warning: MODEL_DIR '{model_dir_env}' does not exist, using current directory[/yellow]")
+            self.rich_console.print(
+                f"[yellow]⚠️  Warning: MODEL_DIR '{model_dir_env}' does not exist, using current directory[/yellow]"
+            )
 
         # Step 2: Copy madengine's common scripts (pre_scripts, post_scripts, tools)
         # This provides the execution framework scripts
         # Find madengine installation path (works for both development and installed package)
         madengine_common = None
-        
+
         # Option 1: Development mode - check if running from source
         dev_path = Path("src/madengine/scripts/common")
         if dev_path.exists():
@@ -950,23 +1080,34 @@ def ignore_cache_files(directory, files):
             # Option 2: Installed package - find via module location
             try:
                 import madengine
+
                 madengine_module_path = Path(madengine.__file__).parent
                 installed_path = madengine_module_path / "scripts" / "common"
                 if installed_path.exists():
                     madengine_common = installed_path
-                    print(f"Found madengine scripts in installed package: {madengine_common}")
+                    print(
+                        f"Found madengine scripts in installed package: {madengine_common}"
+                    )
             except Exception as e:
                 print(f"Could not locate madengine scripts: {e}")
-        
+
         if madengine_common and madengine_common.exists():
-            print(f"Copying madengine common scripts from {madengine_common} to scripts/common")
-            
+            print(
+                f"Copying madengine common scripts from {madengine_common} to scripts/common"
+            )
+
             dest_common = Path("scripts/common")
             # Ensure the destination directory exists before copying
             dest_common.mkdir(parents=True, exist_ok=True)
-            
+
             # Copy pre_scripts, post_scripts, tools if they exist
-            for item in ["pre_scripts", "post_scripts", "tools", "tools.json", "test_echo.sh"]:
+            for item in [
+                "pre_scripts",
+                "post_scripts",
+                "tools",
+                "tools.json",
+                "test_echo.sh",
+            ]:
                 src_item = madengine_common / item
                 if src_item.exists():
                     dest_item = dest_common / item
@@ -975,19 +1116,21 @@ def ignore_cache_files(directory, files):
                             shutil.rmtree(dest_item)
                         else:
                             dest_item.unlink()
-                    
+
                     if src_item.is_dir():
                         shutil.copytree(src_item, dest_item, ignore=ignore_cache_files)
                     else:
                         shutil.copy2(src_item, dest_item)
                     print(f"  Copied {item}")
         else:
-            self.rich_console.print("[yellow]⚠️  Could not find madengine scripts directory[/yellow]")
+            self.rich_console.print(
+                "[yellow]⚠️  Could not find madengine scripts directory[/yellow]"
+            )
 
         # Step 3: REMOVED - Distribution to model directories is incorrect
         # scripts/common should remain at <cwd>/scripts/common/ for proper relative path access
         # Model scripts reference it via ../scripts/common/ from their directory (e.g., scripts/dummy/)
-        # 
+        #
         # This ensures compatibility with legacy workflow where:
         # - scripts/common/ stays at working directory root
         # - Model scripts use ../scripts/common/ relative paths
@@ -1008,7 +1151,9 @@ def _filter_images_by_gpu_compatibility(
                 )
                 compatible_images[model_name] = image_info
                 continue
-        built_with_vendor = {k: v for k, v in built_images.items() if v.get("gpu_vendor")}
+        built_with_vendor = {
+            k: v for k, v in built_images.items() if v.get("gpu_vendor")
+        }
         compat, skipped = _filter_by_gpu_compat(
             built_with_vendor, runtime_gpu_vendor, runtime_gpu_arch
         )
@@ -1016,7 +1161,7 @@ def _filter_images_by_gpu_compatibility(
         for model_name, reason in skipped:
             self.rich_console.print(f"[dim]  Skipping {model_name}: {reason}[/dim]")
         return compatible_images
-    
+
     def _filter_images_by_gpu_architecture(
         self, built_images: Dict, runtime_gpu_arch: str
     ) -> Dict:
@@ -1047,9 +1192,11 @@ def _filter_images_by_skip_gpu_arch(
             self._write_skipped_status(model_name, image_info, gpu_arch)
         return compatible_images
 
-    def _write_skipped_status(self, model_name: str, image_info: Dict, gpu_arch: str) -> None:
+    def _write_skipped_status(
+        self, model_name: str, image_info: Dict, gpu_arch: str
+    ) -> None:
         """Write SKIPPED status to perf CSV for models that were skipped.
-        
+
         Args:
             model_name: Name of the model that was skipped
             image_info: Image information dictionary
@@ -1059,7 +1206,7 @@ def _write_skipped_status(self, model_name: str, image_info: Dict, gpu_arch: str
             from madengine.reporting.update_perf_csv import update_perf_csv
             import json
             import tempfile
-            
+
             # Create a perf entry for the skipped model
             perf_entry = {
                 "model": model_name,
@@ -1067,37 +1214,42 @@ def _write_skipped_status(self, model_name: str, image_info: Dict, gpu_arch: str
                 "reason": f"Model not supported on {gpu_arch} architecture",
                 "gpu_architecture": gpu_arch,
             }
-            
+
             # Write to temporary JSON file
-            with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+            with tempfile.NamedTemporaryFile(
+                mode="w", suffix=".json", delete=False
+            ) as f:
                 json.dump(perf_entry, f)
                 temp_file = f.name
-            
+
             # Get output CSV path from args
-            output_csv = getattr(self.args, 'output', 'perf.csv')
-            
+            output_csv = getattr(self.args, "output", "perf.csv")
+
             # Update perf CSV with skipped entry
             update_perf_csv(exception_result=temp_file, perf_csv=output_csv)
-            
+
             # Clean up temp file
             import os
+
             os.unlink(temp_file)
-            
+
         except Exception as e:
-            self.rich_console.print(f"[dim]  Warning: Could not write SKIPPED status to CSV: {e}[/dim]")
+            self.rich_console.print(
+                f"[dim]  Warning: Could not write SKIPPED status to CSV: {e}[/dim]"
+            )
 
     def _infer_deployment_target(self, config: Dict) -> str:
         """
         Infer deployment target from configuration structure.
-        
+
         Convention over Configuration:
         - Presence of "k8s" or "kubernetes" field → k8s deployment
         - Presence of "slurm" field → slurm deployment
         - Neither present → local execution
-        
+
         Args:
             config: Configuration dictionary
-            
+
         Returns:
             Deployment target: "k8s", "slurm", or "local"
         """
@@ -1107,5 +1259,3 @@ def _infer_deployment_target(self, config: Dict) -> str:
             return "slurm"
         else:
             return "local"
-    
-
diff --git a/src/madengine/reporting/__init__.py b/src/madengine/reporting/__init__.py
index af8ef4ae..8152a5cf 100644
--- a/src/madengine/reporting/__init__.py
+++ b/src/madengine/reporting/__init__.py
@@ -4,11 +4,11 @@
 Reporting modules for madengine including performance CSV and superset generation.
 """
 
-from .update_perf_csv import PERF_CSV_HEADER, update_perf_csv, flatten_tags
+from .update_perf_csv import PERF_CSV_HEADER, flatten_tags, update_perf_csv
 from .update_perf_super import (
-    update_perf_super_json,
-    update_perf_super_csv,
     convert_super_json_to_csv,
+    update_perf_super_csv,
+    update_perf_super_json,
 )
 
 __all__ = [
@@ -19,4 +19,3 @@
     "update_perf_super_csv",
     "convert_super_json_to_csv",
 ]
-
diff --git a/src/madengine/reporting/csv_to_email.py b/src/madengine/reporting/csv_to_email.py
index 4b21bc17..91835b92 100644
--- a/src/madengine/reporting/csv_to_email.py
+++ b/src/madengine/reporting/csv_to_email.py
@@ -6,9 +6,9 @@
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
-import os
 import argparse
 import logging
+import os
 from typing import List, Optional, Tuple
 
 import pandas as pd
@@ -27,7 +27,7 @@ def find_csv_files(directory: str) -> List[str]:
     """
     csv_files = []
     for filename in os.listdir(directory):
-        if filename.endswith('.csv'):
+        if filename.endswith(".csv"):
             csv_files.append(os.path.join(directory, filename))
     return sorted(csv_files)
 
@@ -43,23 +43,22 @@ def csv_to_html_section(file_path: str) -> Tuple[str, str]:
     """
     # Read the CSV file
     df = pd.read_csv(file_path)
-    
+
     # Get section name from file path
     base_name = os.path.basename(file_path)
     section_name = os.path.splitext(base_name)[0]
-    
+
     # Convert DataFrame to HTML
     html_table = df.to_html(index=False)
-    
+
     # Create HTML section with header
     html_section = f"<h2>{section_name}</h2>\n{html_table}\n"
-    
+
     return section_name, html_section
 
 
 def convert_directory_csvs_to_html(
-    directory_path: str,
-    output_file: str = "run_results.html"
+    directory_path: str, output_file: str = "run_results.html"
 ) -> Optional[str]:
     """Convert all CSV files in a directory to a single HTML file.
 
@@ -77,20 +76,20 @@ def convert_directory_csvs_to_html(
     # Validate input
     if not os.path.exists(directory_path):
         raise FileNotFoundError(f"Directory not found: {directory_path}")
-    
+
     if not os.path.isdir(directory_path):
         raise NotADirectoryError(f"Path is not a directory: {directory_path}")
 
     # Find all CSV files
     csv_files = find_csv_files(directory_path)
-    
+
     if not csv_files:
         logger.warning(f"No CSV files found in directory: {directory_path}")
         print(f"⚠️  No CSV files found in {directory_path}")
         return None
 
     print(f"📊 Found {len(csv_files)} CSV file(s) to process")
-    
+
     # Process each CSV file and combine HTML
     full_html_content = ""
     for csv_file in csv_files:
@@ -104,18 +103,22 @@ def convert_directory_csvs_to_html(
             print(f"  ✗ Failed to convert {os.path.basename(csv_file)}: {e}")
 
     # Write combined HTML to output file
-    output_path = os.path.join(directory_path, output_file) if directory_path != "." else output_file
-    
-    with open(output_path, 'w', encoding='utf-8') as html_file:
+    output_path = (
+        os.path.join(directory_path, output_file)
+        if directory_path != "."
+        else output_file
+    )
+
+    with open(output_path, "w", encoding="utf-8") as html_file:
         html_file.write(full_html_content)
-    
+
     logger.info(f"Generated HTML report: {output_path}")
     return output_path
 
 
 class ConvertCsvToEmail:
     """Handler class for CSV to email-ready HTML conversion command.
-    
+
     This class provides a command-line interface wrapper for converting
     multiple CSV files in a directory to a consolidated HTML report.
     """
@@ -131,13 +134,13 @@ def __init__(self, args: argparse.Namespace):
 
     def run(self) -> bool:
         """Execute the CSV to email HTML conversion.
-        
+
         Returns:
             True if conversion was successful, False otherwise.
         """
-        directory_path = getattr(self.args, 'csv_file_path', '.') or '.'
-        output_file = getattr(self.args, 'output_file', 'run_results.html')
-        
+        directory_path = getattr(self.args, "csv_file_path", ".") or "."
+        output_file = getattr(self.args, "output_file", "run_results.html")
+
         print("\n" + "=" * 80)
         print("📧 CONVERTING CSV FILES TO EMAIL REPORT")
         print("=" * 80)
@@ -145,13 +148,13 @@ def run(self) -> bool:
 
         try:
             output_path = convert_directory_csvs_to_html(directory_path, output_file)
-            
+
             if output_path:
                 print(f"📄 Output file: {output_path}")
                 print("✅ Email report generated successfully")
             else:
                 print("ℹ️  No files to process")
-                
+
             print("=" * 80 + "\n")
             self.return_status = True
         except (FileNotFoundError, NotADirectoryError) as e:
@@ -165,4 +168,3 @@ def run(self) -> bool:
             self.return_status = False
 
         return self.return_status
-
diff --git a/src/madengine/reporting/csv_to_html.py b/src/madengine/reporting/csv_to_html.py
index baf7a027..6c23695d 100644
--- a/src/madengine/reporting/csv_to_html.py
+++ b/src/madengine/reporting/csv_to_html.py
@@ -6,9 +6,9 @@
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
-import os
 import argparse
 import logging
+import os
 from typing import Optional
 
 import pandas as pd
@@ -17,9 +17,7 @@
 
 
 def convert_csv_to_html(
-    file_path: str,
-    output_path: Optional[str] = None,
-    include_index: bool = False
+    file_path: str, output_path: Optional[str] = None, include_index: bool = False
 ) -> str:
     """Convert a CSV file to an HTML file.
 
@@ -39,8 +37,8 @@ def convert_csv_to_html(
     # Validate input
     if not os.path.exists(file_path):
         raise FileNotFoundError(f"CSV file not found: {file_path}")
-    
-    if not file_path.endswith('.csv'):
+
+    if not file_path.endswith(".csv"):
         raise ValueError(f"File must be a CSV file: {file_path}")
 
     # Determine output path
@@ -48,8 +46,12 @@ def convert_csv_to_html(
         base_path = os.path.dirname(file_path)
         base_name = os.path.basename(file_path)
         file_name = os.path.splitext(base_name)[0]
-        
-        output_path = os.path.join(base_path, f"{file_name}.html") if base_path else f"{file_name}.html"
+
+        output_path = (
+            os.path.join(base_path, f"{file_name}.html")
+            if base_path
+            else f"{file_name}.html"
+        )
 
     # Read CSV file
     logger.info(f"Reading CSV file: {file_path}")
@@ -63,6 +65,7 @@ def convert_csv_to_html(
     file_name = os.path.splitext(os.path.basename(file_path))[0]
     try:
         from madengine.utils.log_formatting import print_dataframe_beautiful
+
         print_dataframe_beautiful(df, f"Converting CSV: {file_name}")
     except ImportError:
         # Fallback to basic formatting if utils not available
@@ -74,9 +77,9 @@ def convert_csv_to_html(
     # Convert DataFrame to HTML
     logger.info(f"Converting to HTML: {output_path}")
     df_html = df.to_html(index=include_index)
-    
+
     # Write HTML file
-    with open(output_path, 'w', encoding='utf-8') as html_file:
+    with open(output_path, "w", encoding="utf-8") as html_file:
         html_file.write(df_html)
 
     logger.info(f"✅ Successfully converted {file_path} to {output_path}")
@@ -85,7 +88,7 @@ def convert_csv_to_html(
 
 class ConvertCsvToHtml:
     """Handler class for CSV to HTML conversion command.
-    
+
     This class provides a command-line interface wrapper for converting
     CSV files to HTML format.
     """
@@ -101,12 +104,12 @@ def __init__(self, args: argparse.Namespace):
 
     def run(self) -> bool:
         """Execute the CSV to HTML conversion.
-        
+
         Returns:
             True if conversion was successful, False otherwise.
         """
         file_path = self.args.csv_file_path
-        
+
         print("\n" + "=" * 80)
         print("🔄 CONVERTING CSV TO HTML REPORT")
         print("=" * 80)
@@ -133,4 +136,3 @@ def run(self) -> bool:
             self.return_status = False
 
         return self.return_status
-
diff --git a/src/madengine/reporting/update_perf_csv.py b/src/madengine/reporting/update_perf_csv.py
index f298efa2..68aff238 100644
--- a/src/madengine/reporting/update_perf_csv.py
+++ b/src/madengine/reporting/update_perf_csv.py
@@ -111,10 +111,12 @@ def handle_multiple_results(
 
     # Check that the multiple results CSV has the following required columns:
     # model, performance, metric
-    headings = ['model', 'performance', 'metric']
+    headings = ["model", "performance", "metric"]
     for heading in headings:
-        if not(heading in multiple_results_header):
-            raise RuntimeError(multiple_results + " file is missing the " + heading + " column")
+        if not (heading in multiple_results_header):
+            raise RuntimeError(
+                multiple_results + " file is missing the " + heading + " column"
+            )
 
     common_info_json = read_json(common_info)
     flatten_tags(common_info_json)
@@ -125,7 +127,7 @@ def handle_multiple_results(
         row = common_info_json.copy()
         model = r.pop("model")
         row["model"] = model_name + "_" + str(model)
-        
+
         # Extract all columns from CSV result to ensure proper column alignment
         # This ensures all result columns (benchmark, tp, inp, out, dtype, etc.) are captured
         for key, value in r.items():
@@ -140,7 +142,7 @@ def handle_multiple_results(
         for key, value in row.items():
             if isinstance(value, (list, tuple)):
                 row[key] = ",".join(str(v) for v in value)
-        
+
         # Create a single-row DataFrame from the row dict
         row_df = pd.DataFrame([row])
         final_multiple_results_df = pd.concat(
@@ -152,24 +154,28 @@ def handle_multiple_results(
         desired_columns = perf_csv_df.columns.tolist()
         # Add any additional columns from final_multiple_results_df
         desired_columns = desired_columns + [
-            col for col in final_multiple_results_df.columns if col not in desired_columns
+            col
+            for col in final_multiple_results_df.columns
+            if col not in desired_columns
         ]
         # Only select columns that actually exist in final_multiple_results_df to avoid KeyError
-        available_columns = [col for col in desired_columns if col in final_multiple_results_df.columns]
+        available_columns = [
+            col for col in desired_columns if col in final_multiple_results_df.columns
+        ]
         final_multiple_results_df = final_multiple_results_df[available_columns]
 
     perf_entry_df_to_csv(final_multiple_results_df)
-    
+
     # Also save as JSON for consistency with single result workflow
     # This ensures perf_entry.json is always up-to-date regardless of result type
-    perf_entry_list = final_multiple_results_df.to_dict(orient='records')
+    perf_entry_list = final_multiple_results_df.to_dict(orient="records")
     with open("perf_entry.json", "w") as f:
         # If multiple entries, save as array; if single, save as object for consistency
         if len(perf_entry_list) == 1:
             json.dump(perf_entry_list[0], f, indent=2)
         else:
             json.dump(perf_entry_list, f, indent=2)
-    
+
     if perf_csv_df.empty:
         perf_csv_df = final_multiple_results_df
     else:
diff --git a/src/madengine/reporting/update_perf_super.py b/src/madengine/reporting/update_perf_super.py
index f0b1753c..f4bd9433 100644
--- a/src/madengine/reporting/update_perf_super.py
+++ b/src/madengine/reporting/update_perf_super.py
@@ -11,48 +11,50 @@
 import json
 import os
 import typing
+
 # third-party imports
 import pandas as pd
+
 # MAD Engine imports
 from madengine.utils.config_parser import ConfigParser
 
 
 def read_json(js: str) -> typing.Union[dict, list]:
     """Read a JSON file.
-    
+
     Args:
         js: The path to the JSON file.
-    
+
     Returns:
         The JSON dictionary or list.
     """
-    with open(js, 'r') as f:
+    with open(js, "r") as f:
         return json.load(f)
 
 
 def write_json(data: typing.Union[dict, list], output_path: str) -> None:
     """Write data to a JSON file.
-    
+
     Args:
         data: The data to write (dict or list).
         output_path: The path to the output JSON file.
     """
-    with open(output_path, 'w') as f:
+    with open(output_path, "w") as f:
         json.dump(data, f, indent=2)
 
 
 def load_perf_super_json(perf_super_json: str) -> list:
     """Load existing perf_super.json file (cumulative).
-    
+
     Args:
         perf_super_json: Path to perf_super.json file.
-    
+
     Returns:
         List of performance records, or empty list if file doesn't exist.
     """
     if not os.path.exists(perf_super_json):
         return []
-    
+
     try:
         data = read_json(perf_super_json)
         # Ensure it's a list
@@ -66,85 +68,85 @@ def load_perf_super_json(perf_super_json: str) -> list:
 
 
 def handle_multiple_results_super(
-        perf_super_list: list,
-        multiple_results: str,
-        common_info: str,
-        model_name: str,
-        config_parser: ConfigParser
-    ) -> list:
+    perf_super_list: list,
+    multiple_results: str,
+    common_info: str,
+    model_name: str,
+    config_parser: ConfigParser,
+) -> list:
     """Handle multiple results with config matching.
-    
+
     Args:
         perf_super_list: List of existing performance records.
         multiple_results: The path to the multiple results CSV file.
         common_info: The path to the common info JSON file.
         model_name: The model name.
         config_parser: ConfigParser instance for loading configs.
-        
+
     Returns:
         Updated list of performance records with configs.
     """
     # Load multiple results CSV
     multiple_results_df = pd.read_csv(multiple_results)
     multiple_results_df.columns = multiple_results_df.columns.str.strip()
-    
+
     # Check required columns
-    required_cols = ['model', 'performance', 'metric']
+    required_cols = ["model", "performance", "metric"]
     for col in required_cols:
         if col not in multiple_results_df.columns:
             raise RuntimeError(f"{multiple_results} file is missing the {col} column")
-    
+
     # Load common info
     common_info_json = read_json(common_info)
-    
+
     # Parse config file from args if present
     configs_data = None
-    if 'args' in common_info_json and common_info_json['args']:
+    if "args" in common_info_json and common_info_json["args"]:
         # model_scripts_path: use None so resolution relies on config_parser.scripts_base_dir
         # (callers pass scripts_base_dir when creating the parser; 'pipeline' is not a path)
-        configs_data = config_parser.parse_and_load(
-            common_info_json['args'],
-            None
-        )
-    
+        configs_data = config_parser.parse_and_load(common_info_json["args"], None)
+
     # Process each result row
     for result_row in multiple_results_df.to_dict(orient="records"):
         record = common_info_json.copy()
-        
+
         # Update model name
         result_model = result_row.pop("model")
         record["model"] = f"{model_name}_{result_model}"
-        
+
         # Extract standard performance/metric columns
         record["performance"] = result_row.pop("performance")
         record["metric"] = result_row.pop("metric")
         # test_duration for Duration column in reports (avoid N/A when CSV has it)
         _td = result_row.pop("test_duration", "")
-        record["test_duration"] = "" if (_td is None or _td == "" or pd.isna(_td)) else str(_td)
+        record["test_duration"] = (
+            "" if (_td is None or _td == "" or pd.isna(_td)) else str(_td)
+        )
 
         # Put remaining metrics into multi_results
         # Exclude internal fields that shouldn't be in multi_results
-        extra_metrics = {k: v for k, v in result_row.items() 
-                         if k not in ["status"] and pd.notna(v)}
+        extra_metrics = {
+            k: v for k, v in result_row.items() if k not in ["status"] and pd.notna(v)
+        }
         if extra_metrics:
             record["multi_results"] = extra_metrics
         else:
             record["multi_results"] = None
-        
+
         # Set status based on performance
-        if record.get("performance") is not None and pd.notna(record.get("performance")):
+        if record.get("performance") is not None and pd.notna(
+            record.get("performance")
+        ):
             record["status"] = "SUCCESS"
         else:
             record["status"] = "FAILURE"
-        
+
         # Match config to this specific result
         if configs_data:
             if isinstance(configs_data, list):
                 # For CSV configs with multiple rows, try to match
                 matched_config = config_parser.match_config_to_result(
-                    configs_data,
-                    result_row,
-                    result_model
+                    configs_data, result_row, result_model
                 )
                 record["configs"] = matched_config
             else:
@@ -152,77 +154,71 @@ def handle_multiple_results_super(
                 record["configs"] = configs_data
         else:
             record["configs"] = None
-        
+
         perf_super_list.append(record)
-    
+
     return perf_super_list
 
 
-def handle_single_result_super(
-        perf_super_list: list,
-        single_result: str
-    ) -> list:
+def handle_single_result_super(perf_super_list: list, single_result: str) -> list:
     """Handle a single result.
-    
+
     Args:
         perf_super_list: List of existing performance records.
         single_result: The path to the single result JSON file.
-    
+
     Returns:
         Updated list of performance records.
     """
     single_result_json = read_json(single_result)
-    
+
     # Ensure configs field exists (may be None)
     if "configs" not in single_result_json:
         single_result_json["configs"] = None
-    
+
     # Ensure multi_results field exists (may be None)
     if "multi_results" not in single_result_json:
         single_result_json["multi_results"] = None
-    
+
     perf_super_list.append(single_result_json)
     return perf_super_list
 
 
-def handle_exception_result_super(
-        perf_super_list: list,
-        exception_result: str
-    ) -> list:
+def handle_exception_result_super(perf_super_list: list, exception_result: str) -> list:
     """Handle an exception result.
-    
+
     Args:
         perf_super_list: List of existing performance records.
         exception_result: The path to the exception result JSON file.
-    
+
     Returns:
         Updated list of performance records.
     """
     exception_result_json = read_json(exception_result)
-    
+
     # Ensure configs field exists (may be None)
     if "configs" not in exception_result_json:
         exception_result_json["configs"] = None
-    
+
     # Ensure multi_results field exists (may be None)
     if "multi_results" not in exception_result_json:
         exception_result_json["multi_results"] = None
-    
+
     perf_super_list.append(exception_result_json)
     return perf_super_list
 
 
 def update_perf_super_json(
-        perf_super_json: str,
-        multiple_results: typing.Optional[str] = None,
-        single_result: typing.Optional[str] = None,
-        exception_result: typing.Optional[str] = None,
-        common_info: typing.Optional[str] = None,
-        model_name: typing.Optional[str] = None,
-        scripts_base_dir: typing.Optional[str] = None,
-    ) -> int:
+    perf_super_json: str,
+    multiple_results: typing.Optional[str] = None,
+    single_result: typing.Optional[str] = None,
+    exception_result: typing.Optional[str] = None,
+    common_info: typing.Optional[str] = None,
+    model_name: typing.Optional[str] = None,
+    scripts_base_dir: typing.Optional[str] = None,
+) -> int:
     """Update the perf_super.json file (cumulative) with the latest performance data.
-    
+
     Args:
         perf_super_json: Path to perf_super.json file (cumulative).
         multiple_results: Path to multiple results CSV file.
@@ -231,7 +227,7 @@ def update_perf_super_json(
         common_info: Path to common info JSON file.
         model_name: The model name.
         scripts_base_dir: Base directory for scripts (for config file resolution).
-        
+
     Returns:
         Number of entries added in this update.
     """
@@ -239,14 +235,14 @@ def update_perf_super_json(
     print("📊 UPDATING PERFORMANCE SUPERSET DATABASE")
     print("=" * 80)
     print(f"📂 Target file: {perf_super_json}")
-    
+
     # Load existing perf_super.json
     perf_super_list = load_perf_super_json(perf_super_json)
     initial_count = len(perf_super_list)
-    
+
     # Create config parser
     config_parser = ConfigParser(scripts_base_dir=scripts_base_dir)
-    
+
     # Handle different result types
     if multiple_results:
         print("🔄 Processing multiple results with configs...")
@@ -268,23 +264,23 @@ def update_perf_super_json(
     else:
         print("ℹ️  No results to update in perf_super.json")
         return 0
-    
+
     # Write updated perf_super.json
     write_json(perf_super_list, perf_super_json)
     entries_added = len(perf_super_list) - initial_count
     print(f"✅ Successfully updated: {perf_super_json} (added {entries_added} entries)")
     print("=" * 80 + "\n")
-    
+
     return entries_added
 
 
 def generate_perf_entry_super_json(
     perf_super_json: str = "perf_super.json",
     perf_entry_super_json: str = "perf_entry_super.json",
-    num_entries: int = 1
+    num_entries: int = 1,
 ) -> None:
     """Generate perf_entry_super.json (latest entries) from perf_super.json (cumulative).
-    
+
     Args:
         perf_super_json: Path to cumulative JSON source
         perf_entry_super_json: Path to entry JSON output (latest entries only)
@@ -293,31 +289,33 @@ def generate_perf_entry_super_json(
     if not os.path.exists(perf_super_json):
         print(f"⚠️  {perf_super_json} not found, skipping entry JSON generation")
         return
-    
+
     data = read_json(perf_super_json)
     if not isinstance(data, list):
         data = [data]
-    
+
     if not data:
         print(f"⚠️  {perf_super_json} is empty, skipping entry JSON generation")
         return
-    
+
     # Take the latest num_entries entries
     entry_data = data[-num_entries:] if num_entries > 0 else [data[-1]]
-    
+
     # Write to perf_entry_super.json
     write_json(entry_data, perf_entry_super_json)
-    print(f"✅ Generated entry JSON: {perf_entry_super_json} ({len(entry_data)} entries)")
+    print(
+        f"✅ Generated entry JSON: {perf_entry_super_json} ({len(entry_data)} entries)"
+    )
 
 
 def convert_super_json_to_csv(
     perf_super_json: str,
     output_csv: str,
     entry_only: bool = False,
-    num_entries: int = 1
+    num_entries: int = 1,
 ) -> None:
     """Convert JSON to CSV format.
-    
+
     Args:
         perf_super_json: Path to JSON source
         output_csv: Output CSV path
@@ -328,33 +326,33 @@ def convert_super_json_to_csv(
     if not os.path.exists(perf_super_json):
         print(f"⚠️  {perf_super_json} not found, skipping CSV generation")
         return
-    
+
     data = read_json(perf_super_json)
     if not isinstance(data, list):
         data = [data]
-    
+
     if not data:
         print(f"⚠️  {perf_super_json} is empty, skipping CSV generation")
         return
-    
+
     if entry_only and data:
         # Take the latest num_entries entries
         data = data[-num_entries:] if num_entries > 0 else [data[-1]]
-    
+
     # Convert to DataFrame
     df = pd.DataFrame(data)
-    
+
     # Serialize complex fields to JSON strings
-    if 'configs' in df.columns:
-        df['configs'] = df['configs'].apply(
+    if "configs" in df.columns:
+        df["configs"] = df["configs"].apply(
             lambda x: json.dumps(x) if x is not None else None
         )
-    
-    if 'multi_results' in df.columns:
-        df['multi_results'] = df['multi_results'].apply(
+
+    if "multi_results" in df.columns:
+        df["multi_results"] = df["multi_results"].apply(
             lambda x: json.dumps(x) if x is not None else None
         )
-    
+
     # Write to CSV
     df.to_csv(output_csv, index=False)
     print(f"✅ Generated CSV: {output_csv} ({len(df)} entries)")
@@ -363,10 +361,10 @@ def convert_super_json_to_csv(
 def update_perf_super_csv(
     perf_super_json: str = "perf_super.json",
     perf_super_csv: str = "perf_super.csv",
-    num_entries: int = 1
+    num_entries: int = 1,
 ) -> None:
     """Generate perf_entry_super.json, perf_entry_super.csv and perf_super.csv from perf_super.json.
-    
+
     Args:
         perf_super_json: Path to cumulative JSON source (perf_super.json)
         perf_super_csv: Path to cumulative CSV (perf_super.csv)
@@ -375,27 +373,22 @@ def update_perf_super_csv(
     print("\n" + "=" * 80)
     print("📄 GENERATING FILES FROM PERFORMANCE SUPERSET")
     print("=" * 80)
-    
+
     # Generate perf_entry_super.json (latest entries from current run)
     generate_perf_entry_super_json(
         perf_super_json=perf_super_json,
         perf_entry_super_json="perf_entry_super.json",
-        num_entries=num_entries
+        num_entries=num_entries,
     )
-    
+
     # Generate perf_entry_super.csv (latest entries from current run)
     convert_super_json_to_csv(
         "perf_entry_super.json",  # Use the entry JSON as source
         "perf_entry_super.csv",
-        entry_only=False  # Read all from entry JSON (already filtered)
+        entry_only=False,  # Read all from entry JSON (already filtered)
     )
-    
+
     # Generate perf_super.csv (all entries)
-    convert_super_json_to_csv(
-        perf_super_json,
-        perf_super_csv,
-        entry_only=False
-    )
-    
-    print("=" * 80 + "\n")
+    convert_super_json_to_csv(perf_super_json, perf_super_csv, entry_only=False)
 
+    print("=" * 80 + "\n")
diff --git a/src/madengine/scripts/common/post_scripts/gpu_info_post.sh b/src/madengine/scripts/common/post_scripts/gpu_info_post.sh
index 337a9550..04664d1d 100644
--- a/src/madengine/scripts/common/post_scripts/gpu_info_post.sh
+++ b/src/madengine/scripts/common/post_scripts/gpu_info_post.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 set -x
 
@@ -38,7 +38,7 @@ if [ ! -f "$OUTPUT" ]; then
     echo "⚠️  Warning: $OUTPUT not found in $(pwd)"
     echo "⚠️  This may be expected if multiple gpu_info tools are stacked together"
     echo "⚠️  and only one ran successfully. Checking for any profiler outputs..."
-    
+
     # Check if prof.csv exists (default output name)
     if [ -f "prof.csv" ]; then
         echo "Found prof.csv - renaming to $OUTPUT"
@@ -47,11 +47,11 @@ if [ ! -f "$OUTPUT" ]; then
         echo "Profiler output saved to: $(pwd)/${OUTPUT}"
         exit 0
     fi
-    
+
     # List all CSV files for debugging
     echo "Available CSV files in directory:"
     ls -la *.csv 2>/dev/null || echo "No CSV files found"
-    
+
     # Don't fail - just warn and exit successfully
     # This allows other stacked tools to complete their post-scripts
     echo "⚠️  Profiler output $OUTPUT not found - skipping (non-fatal)"
diff --git a/src/madengine/scripts/common/post_scripts/gpu_info_power_stop.sh b/src/madengine/scripts/common/post_scripts/gpu_info_power_stop.sh
index 051eb9a7..3b456fd9 100755
--- a/src/madengine/scripts/common/post_scripts/gpu_info_power_stop.sh
+++ b/src/madengine/scripts/common/post_scripts/gpu_info_power_stop.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 # Stop gpu_info_power_profiler and collect output
 
 set -x
@@ -32,23 +32,23 @@ if ! kill -0 "$PROFILER_PID" 2>/dev/null; then
     echo "⚠️  Warning: Power profiler process (PID: $PROFILER_PID) is not running"
 else
     echo "Sending termination signal to power profiler (PID: $PROFILER_PID)..."
-    
+
     # Send SIGTERM to gracefully stop the profiler
     kill -TERM "$PROFILER_PID" 2>/dev/null || true
-    
+
     # Wait for profiler to finish writing output (max 10 seconds)
     WAIT_COUNT=0
     while kill -0 "$PROFILER_PID" 2>/dev/null && [ $WAIT_COUNT -lt 20 ]; do
         sleep 0.5
         WAIT_COUNT=$((WAIT_COUNT + 1))
     done
-    
+
     # Force kill if still running
     if kill -0 "$PROFILER_PID" 2>/dev/null; then
         echo "⚠️  Profiler did not stop gracefully, force killing..."
         kill -9 "$PROFILER_PID" 2>/dev/null || true
     fi
-    
+
     echo "✓ GPU power profiler stopped"
 fi
 
@@ -63,4 +63,3 @@ if [ -f "/tmp/gpu_info_power_profiler.log" ]; then
     tail -20 /tmp/gpu_info_power_profiler.log || true
     echo "=========================="
 fi
-
diff --git a/src/madengine/scripts/common/post_scripts/gpu_info_vram_stop.sh b/src/madengine/scripts/common/post_scripts/gpu_info_vram_stop.sh
index 221a283a..3ad91d8c 100755
--- a/src/madengine/scripts/common/post_scripts/gpu_info_vram_stop.sh
+++ b/src/madengine/scripts/common/post_scripts/gpu_info_vram_stop.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 # Stop gpu_info_vram_profiler and collect output
 
 set -x
@@ -32,23 +32,23 @@ if ! kill -0 "$PROFILER_PID" 2>/dev/null; then
     echo "⚠️  Warning: VRAM profiler process (PID: $PROFILER_PID) is not running"
 else
     echo "Sending termination signal to VRAM profiler (PID: $PROFILER_PID)..."
-    
+
     # Send SIGTERM to gracefully stop the profiler
     kill -TERM "$PROFILER_PID" 2>/dev/null || true
-    
+
     # Wait for profiler to finish writing output (max 10 seconds)
     WAIT_COUNT=0
     while kill -0 "$PROFILER_PID" 2>/dev/null && [ $WAIT_COUNT -lt 20 ]; do
         sleep 0.5
         WAIT_COUNT=$((WAIT_COUNT + 1))
     done
-    
+
     # Force kill if still running
     if kill -0 "$PROFILER_PID" 2>/dev/null; then
         echo "⚠️  Profiler did not stop gracefully, force killing..."
         kill -9 "$PROFILER_PID" 2>/dev/null || true
     fi
-    
+
     echo "✓ GPU VRAM profiler stopped"
 fi
 
@@ -63,4 +63,3 @@ if [ -f "/tmp/gpu_info_vram_profiler.log" ]; then
     tail -20 /tmp/gpu_info_vram_profiler.log || true
     echo "=========================="
 fi
-
diff --git a/src/madengine/scripts/common/post_scripts/post_test.sh b/src/madengine/scripts/common/post_scripts/post_test.sh
index 424c012f..31325c53 100644
--- a/src/madengine/scripts/common/post_scripts/post_test.sh
+++ b/src/madengine/scripts/common/post_scripts/post_test.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 version=${1:-0}
 echo "Post-Script test called $version"
diff --git a/src/madengine/scripts/common/post_scripts/trace.sh b/src/madengine/scripts/common/post_scripts/trace.sh
index 1e489861..ef5cc185 100644
--- a/src/madengine/scripts/common/post_scripts/trace.sh
+++ b/src/madengine/scripts/common/post_scripts/trace.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 set -e
 set -x
@@ -28,7 +28,7 @@ rpd)
 		# Still create output directory and copy what we can
 		touch "$OUTPUT/trace.rpd"  # Create empty file so test can find directory structure
 	fi
-	
+
 	echo "RPD post-script: Checking for rpd2tracing.py script..."
 	if [ -f "./rocmProfileData/tools/rpd2tracing.py" ]; then
 		echo "RPD post-script: rpd2tracing.py found"
@@ -38,7 +38,7 @@ rpd)
 		else
 			echo "RPD post-script: Skipping rpd2tracing.py because trace.rpd is missing or empty"
 			# Create empty files so the directory structure exists
-			touch "$OUTPUT/trace.rpd"  
+			touch "$OUTPUT/trace.rpd"
 			touch "$OUTPUT/trace.json"
 		fi
 	else
@@ -49,14 +49,14 @@ rpd)
 		touch "$OUTPUT/trace.rpd"
 		touch "$OUTPUT/trace.json"
 	fi
-	
+
 	cp -vLR --preserve=all "$OUTPUT" "$SAVESPACE"
 	;;
 
 rocprof)
 	# Handle both legacy rocprof (results*) and rocprofv3 (different output format)
 	echo "ROCprof post-script: Collecting profiling output..."
-	
+
 	# Check for legacy rocprof results files
 	if ls results* 1> /dev/null 2>&1; then
 		echo "Found rocprof results files"
@@ -64,7 +64,7 @@ rocprof)
 	else
 		echo "No rocprof results* files found (may be using rocprofv3)"
 	fi
-	
+
 	# Check for rocprofv3 output directories (UUID pattern like 1e4d92661463/)
 	# rocprofv3 creates directories with hex UUIDs containing .db files
 	found_rocprofv3_output=false
@@ -79,18 +79,18 @@ rocprof)
 			fi
 		fi
 	done
-	
+
 	# Also check for other rocprofv3 output patterns
 	if ls rocprofv3-* 1> /dev/null 2>&1; then
 		echo "Found rocprofv3-* files"
 		mv rocprofv3-* "$OUTPUT" 2>/dev/null || true
 		found_rocprofv3_output=true
 	fi
-	
+
 	if [ "$found_rocprofv3_output" = true ]; then
 		echo "Collected rocprofv3 profiling data"
 	fi
-	
+
 	# Check for CSV trace files in subdirectories (rocprof can create hostname subdirectories)
 	# Look for patterns like: hostname/pid_kernel_trace.csv, hostname/pid_hip_api_trace.csv, etc.
 	csv_found=false
@@ -106,11 +106,11 @@ rocprof)
 			fi
 		fi
 	done
-	
+
 	if [ "$csv_found" = true ]; then
 		echo "Collected rocprof CSV trace files from subdirectories"
 	fi
-	
+
 	# Consolidate rocprofv3 CSV files so MAD-agent finds rocprofv3_output_* names.
 	# rocprofv3 may write agent_info in -o prefix but kernel_trace/stats with PID prefix or under hostname/pid.
 	for base in agent_info domain_stats kernel_stats kernel_trace hip_api_trace counter_collection; do
@@ -123,7 +123,7 @@ rocprof)
 			cp -v "$first" "$canonical"
 		fi
 	done
-	
+
 	# Generate instruction_histogram.json from counter/domain_stats CSV so MAD-agent gets real instruction mix.
 	if [ -f "${OUTPUT}/rocprofv3_output_counter_collection.csv" ] || [ -f "${OUTPUT}/rocprofv3_output_domain_stats.csv" ]; then
 		CONVERTER="$(cd "$(dirname "$0")/../tools" 2>/dev/null && pwd)/rocprof_counter_csv_to_instruction_histogram.py"
diff --git a/src/madengine/scripts/common/pre_scripts/gpu_info_power_start.sh b/src/madengine/scripts/common/pre_scripts/gpu_info_power_start.sh
index d28c5763..ce2bf7d1 100755
--- a/src/madengine/scripts/common/pre_scripts/gpu_info_power_start.sh
+++ b/src/madengine/scripts/common/pre_scripts/gpu_info_power_start.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 # Start gpu_info_power_profiler in background mode
 
 set -x
@@ -60,4 +60,3 @@ sleep 2
 touch "$PROFILER_START_FILE"
 
 echo "✓ GPU power profiler initialization complete"
-
diff --git a/src/madengine/scripts/common/pre_scripts/gpu_info_pre.sh b/src/madengine/scripts/common/pre_scripts/gpu_info_pre.sh
index 60bd60a0..1b56aecf 100644
--- a/src/madengine/scripts/common/pre_scripts/gpu_info_pre.sh
+++ b/src/madengine/scripts/common/pre_scripts/gpu_info_pre.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 gpu_vendor=""
 if [ -f "/usr/bin/nvidia-smi" ]; then
diff --git a/src/madengine/scripts/common/pre_scripts/gpu_info_vram_start.sh b/src/madengine/scripts/common/pre_scripts/gpu_info_vram_start.sh
index 2ae8e83d..423ba822 100755
--- a/src/madengine/scripts/common/pre_scripts/gpu_info_vram_start.sh
+++ b/src/madengine/scripts/common/pre_scripts/gpu_info_vram_start.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 # Start gpu_info_vram_profiler in background mode
 
 set -x
@@ -60,4 +60,3 @@ sleep 2
 touch "$PROFILER_START_FILE"
 
 echo "✓ GPU VRAM profiler initialization complete"
-
diff --git a/src/madengine/scripts/common/pre_scripts/miopen_build_latest.sh b/src/madengine/scripts/common/pre_scripts/miopen_build_latest.sh
index 5a9b4714..cf7f02ca 100644
--- a/src/madengine/scripts/common/pre_scripts/miopen_build_latest.sh
+++ b/src/madengine/scripts/common/pre_scripts/miopen_build_latest.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 set -e
 set -x
diff --git a/src/madengine/scripts/common/pre_scripts/pre_test.sh b/src/madengine/scripts/common/pre_scripts/pre_test.sh
index 68849453..e87f429c 100644
--- a/src/madengine/scripts/common/pre_scripts/pre_test.sh
+++ b/src/madengine/scripts/common/pre_scripts/pre_test.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 version=${1:-0}
 echo "Pre-Script test called $version"
diff --git a/src/madengine/scripts/common/pre_scripts/rocEnvTool/console.py b/src/madengine/scripts/common/pre_scripts/rocEnvTool/console.py
index b91da1a2..b4aaf76a 100644
--- a/src/madengine/scripts/common/pre_scripts/rocEnvTool/console.py
+++ b/src/madengine/scripts/common/pre_scripts/rocEnvTool/console.py
@@ -5,36 +5,56 @@
 """
 import subprocess
 
+
 class Console:
-  """ Console class
-  class to run console commands
-  """
-  def __init__(self, shellVerbose=True, live_output=False):
-      self.shellVerbose = shellVerbose
-      self.live_output = live_output
+    """Console class
+    class to run console commands
+    """
+
+    def __init__(self, shellVerbose=True, live_output=False):
+        self.shellVerbose = shellVerbose
+        self.live_output = live_output
 
-  def sh(self, command, canFail=False, timeout=60, secret=False, prefix=""):
-      if self.shellVerbose and not secret:
-          print("> " + command, flush=True)
-      proc = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, universal_newlines=True, bufsize=1)
-      try:
-          if not self.live_output: 
-              outs, errs = proc.communicate(timeout=timeout)
-          else:
-              outs = []
-              for stdout_line in iter(proc.stdout.readline, ""):
-                  print(prefix+stdout_line, end="" )
-                  outs.append(stdout_line )
-              outs = ''.join(outs)
-              proc.stdout.close()
-              proc.wait(timeout=timeout) 
-      except subprocess.TimeoutExpired as exc:
-          proc.kill()
-          raise RuntimeError('Console script timeout') from exc
-      if proc.returncode != 0:
-          if not canFail:
-            if not secret:
-                raise RuntimeError("Subprocess '" + command + "' failed with exit code " + str(proc.returncode) )
+    def sh(self, command, canFail=False, timeout=60, secret=False, prefix=""):
+        if self.shellVerbose and not secret:
+            print("> " + command, flush=True)
+        proc = subprocess.Popen(
+            command,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            shell=True,
+            universal_newlines=True,
+            bufsize=1,
+        )
+        try:
+            if not self.live_output:
+                outs, errs = proc.communicate(timeout=timeout)
             else:
-                raise RuntimeError("Subprocess '" + secret + "' failed with exit code " + str(proc.returncode) )
-      return outs.strip()
+                outs = []
+                for stdout_line in iter(proc.stdout.readline, ""):
+                    print(prefix + stdout_line, end="")
+                    outs.append(stdout_line)
+                outs = "".join(outs)
+                proc.stdout.close()
+                proc.wait(timeout=timeout)
+        except subprocess.TimeoutExpired as exc:
+            proc.kill()
+            raise RuntimeError("Console script timeout") from exc
+        if proc.returncode != 0:
+            if not canFail:
+                if not secret:
+                    raise RuntimeError(
+                        "Subprocess '"
+                        + command
+                        + "' failed with exit code "
+                        + str(proc.returncode)
+                    )
+                else:
+                    raise RuntimeError(
+                        "Subprocess '"
+                        + secret
+                        + "' failed with exit code "
+                        + str(proc.returncode)
+                    )
+        return outs.strip()
diff --git a/src/madengine/scripts/common/pre_scripts/rocEnvTool/csv_parser.py b/src/madengine/scripts/common/pre_scripts/rocEnvTool/csv_parser.py
index 7c1599ab..8da270d8 100644
--- a/src/madengine/scripts/common/pre_scripts/rocEnvTool/csv_parser.py
+++ b/src/madengine/scripts/common/pre_scripts/rocEnvTool/csv_parser.py
@@ -2,12 +2,14 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
+
 import os
 import shlex
 import shutil
+
 from console import Console
 
-'''
+"""
 CSV Parser - parses various sys config log files and dumps into CSV.
 Only the below tags are supported.
 Enable dumping it via adding --dump-csv in rocEnvTool
@@ -21,7 +23,9 @@
 rocm_env_variables
 pip_list
 numa_balancing
-'''
+"""
+
+
 class CSVParser:
     def __init__(self, filename, sys_config_files_path, tags, path_resolver=None):
         self.filename = filename
@@ -46,7 +50,7 @@ def determine_gpu_device_type(self):
         return gpu_device_type
 
     def get_log_file_data(self, log_file_path):
-        fs = open(log_file_path, 'r')
+        fs = open(log_file_path, "r")
         lines = fs.readlines()
         fs.close()
 
@@ -64,7 +68,7 @@ def dump_os_information_in_csv(self, os_info_path):
                 info_list.append("Node name|" + values[1])
                 info_list.append("Kernel version| " + values[2])
             if "PRETTY_NAME" in line:
-                info_list.append("OS version|" + line.split("=")[1].replace('"', ''))
+                info_list.append("OS version|" + line.split("=")[1].replace('"', ""))
         return info_list
 
     def dump_cpu_information_in_csv(self, cpu_log_path):
@@ -89,24 +93,24 @@ def dump_gpu_information_in_csv(self, gpu_log_path, device_type):
             num_gpu = 0
             for j in range(1, len(lines)):
                 line = lines[j].rstrip()
-                if ("Name:" in line and "gfx" in line):
+                if "Name:" in line and "gfx" in line:
                     name = line.split(":")[1].lstrip()
-                if ("Uuid:" in line):
+                if "Uuid:" in line:
                     uuid = line.split(":")[1].lstrip()
-                if ("Marketing Name:" in line):
+                if "Marketing Name:" in line:
                     marketing_name = line.split(":")[1].lstrip()
-                if ("Vendor Name:" in line):
+                if "Vendor Name:" in line:
                     vendor_name = line.split(":")[1].lstrip()
-                if ("Device Type:" in line):
+                if "Device Type:" in line:
                     device_type = line.split(":")[1].lstrip()
                     if device_type == "GPU":
                         break
 
             for j in range(1, len(lines)):
                 line = lines[j].rstrip()
-                if ("Device Type:" in line):
+                if "Device Type:" in line:
                     device_type = line.split(":")[1].lstrip()
-                    if (device_type == "GPU"):
+                    if device_type == "GPU":
                         num_gpu += 1
             info_list.append("Name|" + name)
             info_list.append("Uuid|" + uuid)
@@ -196,7 +200,7 @@ def dump_rocm_env_variables_in_csv(self, log_path):
         info_list.append(lines[0].rstrip())
         for j in range(1, len(lines)):
             env_values = lines[j].rstrip().split("=")
-            if (env_values[0]):
+            if env_values[0]:
                 info_list.append(env_values[0] + "|" + env_values[1])
         return info_list
 
@@ -256,7 +260,7 @@ def dump_cuda_env_variables_in_csv(self, log_path):
 
     def dump_csv_output(self):
         gpu_device_type = self.gpu_device_type
-        fs = open(self.filename, 'w')
+        fs = open(self.filename, "w")
         fs.write("sep=|")
         fs.write("\n")
         sys_config_info = []
@@ -270,23 +274,37 @@ def dump_csv_output(self):
                 if tag == "cpu_information":
                     sys_config_info.extend(self.dump_cpu_information_in_csv(log_path))
                 if tag == "gpu_information":
-                    sys_config_info.extend(self.dump_gpu_information_in_csv(log_path, gpu_device_type))
+                    sys_config_info.extend(
+                        self.dump_gpu_information_in_csv(log_path, gpu_device_type)
+                    )
                 if tag == "rocm_smi_gpudeviceid":
-                    sys_config_info.extend(self.dump_rocm_smi_gpudeviceid_in_csv(log_path))
+                    sys_config_info.extend(
+                        self.dump_rocm_smi_gpudeviceid_in_csv(log_path)
+                    )
                 if tag == "memory_information":
-                    sys_config_info.extend(self.dump_memory_information_in_csv(log_path))
+                    sys_config_info.extend(
+                        self.dump_memory_information_in_csv(log_path)
+                    )
                 if tag == "rocm_information":
                     sys_config_info.extend(self.dump_rocm_information_in_csv(log_path))
                 if tag == "rocm_packages_installed":
-                    sys_config_info.extend(self.dump_rocm_packages_installed_in_csv(log_path))
+                    sys_config_info.extend(
+                        self.dump_rocm_packages_installed_in_csv(log_path)
+                    )
                 if tag == "rocm_env_variables":
-                    sys_config_info.extend(self.dump_rocm_env_variables_in_csv(log_path))
+                    sys_config_info.extend(
+                        self.dump_rocm_env_variables_in_csv(log_path)
+                    )
                 if tag == "cuda_information":
                     sys_config_info.extend(self.dump_cuda_information_in_csv(log_path))
                 if tag == "cuda_packages_installed":
-                    sys_config_info.extend(self.dump_cuda_packages_installed_in_csv(log_path))
+                    sys_config_info.extend(
+                        self.dump_cuda_packages_installed_in_csv(log_path)
+                    )
                 if tag == "cuda_env_variables":
-                    sys_config_info.extend(self.dump_cuda_env_variables_in_csv(log_path))
+                    sys_config_info.extend(
+                        self.dump_cuda_env_variables_in_csv(log_path)
+                    )
                 if tag == "pip_list":
                     sys_config_info.extend(self.dump_pip_list_in_csv(log_path))
                 if tag == "numa_balancing":
@@ -298,14 +316,14 @@ def dump_csv_output(self):
             fs.write(sys_config_info[j])
             fs.write("\n")
         fs.close()
-        print("\n" + "="*60)
+        print("\n" + "=" * 60)
         print(f"✅ SUCCESS: System config data dumped to {self.filename}")
-        print("="*60 + "\n")
+        print("=" * 60 + "\n")
 
     def print_csv_output(self):
-        print("\n" + "="*80)
+        print("\n" + "=" * 80)
         print("📋 SYSTEM CONFIG INFO - ENVIRONMENT VARIABLES")
-        print("="*80)
+        print("=" * 80)
         if self.sys_config_info_list:
             for j in range(len(self.sys_config_info_list)):
                 line = self.sys_config_info_list[j]
@@ -317,4 +335,4 @@ def print_csv_output(self):
                     print(f"📌 {line}")
         else:
             print("❌ No system config information available")
-        print("="*80 + "\n")
+        print("=" * 80 + "\n")
diff --git a/src/madengine/scripts/common/pre_scripts/rocEnvTool/rocenv_tool.py b/src/madengine/scripts/common/pre_scripts/rocEnvTool/rocenv_tool.py
index b2288dea..da94d25b 100644
--- a/src/madengine/scripts/common/pre_scripts/rocEnvTool/rocenv_tool.py
+++ b/src/madengine/scripts/common/pre_scripts/rocEnvTool/rocenv_tool.py
@@ -2,6 +2,7 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
+
 import os
 import sys
 import argparse
@@ -21,10 +22,11 @@
 
 
 class CommandInfo:
-    '''
-        section_info (str): Name of the section.
-        cmds (list) : command list for a particular section.
-    '''
+    """
+    section_info (str): Name of the section.
+    cmds (list) : command list for a particular section.
+    """
+
     def __init__(self, section_info, cmds):
         self.section_info = section_info
         self.cmds = cmds
@@ -34,53 +36,53 @@ class RocmPathResolver:
     """
     Detects and resolves ROCm installation paths for both TheRock and traditional installations.
     """
-    
+
     def __init__(self, verbose: bool = False):
         self.verbose = verbose
-        self.installation_type = 'unknown'
+        self.installation_type = "unknown"
         self.rocm_root = None
         self.paths = {
-            'rocminfo': None,
-            'rocm_smi': None,
-            'hipcc': None,
-            'amdclang': None,
-            'version_file': None,
-            'manifest_file': None,
+            "rocminfo": None,
+            "rocm_smi": None,
+            "hipcc": None,
+            "amdclang": None,
+            "version_file": None,
+            "manifest_file": None,
         }
         self.therock_details = {}
         self.detect()
-    
+
     def log(self, message: str):
         """Print verbose log messages."""
         if self.verbose:
             print(f"[DEBUG] {message}")
-    
+
     def detect(self):
         """Detect ROCm installation type and locate components."""
         # Method 1: Check for TheRock via rocm-sdk command
         if self._detect_therock_python_package():
             return
-        
+
         # Method 2: Check environment variables for TheRock
         if self._detect_therock_from_env():
             return
-        
+
         # Method 3: Check for TheRock in common paths
         if self._detect_therock_tarball():
             return
-        
+
         # Method 4: Fallback to traditional ROCm
         if self._detect_traditional_rocm():
             return
-        
+
         # Method 5: Try to find binaries in PATH
         self._detect_from_path()
-    
+
     def _is_therock_installation(self, path: Path) -> bool:
         """Check if a path contains TheRock installation markers."""
         if not path.exists():
             return False
-        
+
         # Check for TheRock manifest
         manifest_path = path / "share" / "therock" / "therock_manifest.json"
         if manifest_path.exists():
@@ -88,27 +90,27 @@ def _is_therock_installation(self, path: Path) -> bool:
             try:
                 with open(manifest_path, "r") as f:
                     manifest = json.load(f)
-                    self.therock_details['manifest'] = manifest
+                    self.therock_details["manifest"] = manifest
             except Exception as e:
                 self.log(f"Error reading manifest: {e}")
             return True
-        
+
         # Check for dist_info.json
         dist_info_path = path / "share" / "therock" / "dist_info.json"
         if dist_info_path.exists():
             self.log(f"Found TheRock dist_info at {dist_info_path}")
             return True
-        
+
         return False
-    
+
     def _detect_therock_python_package(self) -> bool:
         """Detect TheRock via Python package installation."""
         self.log("Checking for rocm-sdk command...")
-        
+
         rocm_sdk_path = shutil.which("rocm-sdk")
         if rocm_sdk_path:
             self.log(f"Found rocm-sdk at {rocm_sdk_path}")
-            
+
             try:
                 # Get root path from rocm-sdk
                 result = subprocess.run(
@@ -120,36 +122,36 @@ def _detect_therock_python_package(self) -> bool:
                 if result.returncode == 0:
                     root_path = Path(result.stdout.strip())
                     if self._is_therock_installation(root_path):
-                        self.installation_type = 'therock'
+                        self.installation_type = "therock"
                         self.rocm_root = str(root_path)
                         self._populate_therock_paths(root_path)
                         return True
             except Exception as e:
                 self.log(f"Error getting rocm-sdk path: {e}")
-        
+
         return False
-    
+
     def _detect_therock_from_env(self) -> bool:
         """Detect TheRock from environment variables."""
         self.log("Checking environment variables...")
-        
-        for var in ['ROCM_PATH', 'ROCM_HOME', 'HIP_PATH']:
+
+        for var in ["ROCM_PATH", "ROCM_HOME", "HIP_PATH"]:
             value = os.environ.get(var)
             if value:
                 path = Path(value)
                 if self._is_therock_installation(path):
                     self.log(f"Found TheRock via ${var}={value}")
-                    self.installation_type = 'therock'
+                    self.installation_type = "therock"
                     self.rocm_root = str(path)
                     self._populate_therock_paths(path)
                     return True
-        
+
         return False
-    
+
     def _detect_therock_tarball(self) -> bool:
         """Detect TheRock tarball installations in common paths."""
         self.log("Checking common TheRock installation paths...")
-        
+
         common_paths = [
             Path("/opt/rocm"),
             Path.home() / "rocm",
@@ -157,87 +159,95 @@ def _detect_therock_tarball(self) -> bool:
             Path("/usr/local/rocm"),
             Path.home() / ".local" / "rocm",
         ]
-        
+
         for path in common_paths:
             if self._is_therock_installation(path):
                 self.log(f"Found TheRock at {path}")
-                self.installation_type = 'therock'
+                self.installation_type = "therock"
                 self.rocm_root = str(path)
                 self._populate_therock_paths(path)
                 return True
-        
+
         return False
-    
+
     def _detect_traditional_rocm(self) -> bool:
         """Detect traditional ROCm installation."""
         self.log("Checking for traditional ROCm installation...")
-        
+
         # Check for traditional ROCm marker
         version_file = Path("/opt/rocm/.info/version")
         if version_file.exists():
             self.log("Found traditional ROCm at /opt/rocm")
-            self.installation_type = 'traditional'
+            self.installation_type = "traditional"
             self.rocm_root = "/opt/rocm"
             self._populate_traditional_paths()
             return True
-        
+
         return False
-    
+
     def _detect_from_path(self):
         """Try to find ROCm binaries in PATH."""
         self.log("Searching for ROCm binaries in PATH...")
-        
+
         # Try to find rocminfo
         rocminfo = shutil.which("rocminfo")
         if rocminfo:
-            self.paths['rocminfo'] = rocminfo
+            self.paths["rocminfo"] = rocminfo
             # Try to infer root from binary location
             rocminfo_path = Path(rocminfo)
             if rocminfo_path.exists():
                 potential_root = rocminfo_path.parent.parent
                 if self._is_therock_installation(potential_root):
-                    self.installation_type = 'therock'
+                    self.installation_type = "therock"
                     self.rocm_root = str(potential_root)
                     self._populate_therock_paths(potential_root)
                 else:
-                    self.installation_type = 'unknown'
+                    self.installation_type = "unknown"
                     self.rocm_root = str(potential_root)
-        
+
         # Try to find other binaries
-        self.paths['rocm_smi'] = shutil.which("rocm-smi")
-        self.paths['hipcc'] = shutil.which("hipcc")
-        self.paths['amdclang'] = shutil.which("amdclang")
-    
+        self.paths["rocm_smi"] = shutil.which("rocm-smi")
+        self.paths["hipcc"] = shutil.which("hipcc")
+        self.paths["amdclang"] = shutil.which("amdclang")
+
     def _populate_therock_paths(self, root: Path):
         """Populate paths for TheRock installation."""
         bin_dir = root / "bin"
-        
-        self.paths['rocminfo'] = str(bin_dir / "rocminfo") if (bin_dir / "rocminfo").exists() else None
-        self.paths['rocm_smi'] = str(bin_dir / "rocm-smi") if (bin_dir / "rocm-smi").exists() else None
-        self.paths['hipcc'] = str(bin_dir / "hipcc") if (bin_dir / "hipcc").exists() else None
-        self.paths['amdclang'] = str(bin_dir / "amdclang") if (bin_dir / "amdclang").exists() else None
-        
+
+        self.paths["rocminfo"] = (
+            str(bin_dir / "rocminfo") if (bin_dir / "rocminfo").exists() else None
+        )
+        self.paths["rocm_smi"] = (
+            str(bin_dir / "rocm-smi") if (bin_dir / "rocm-smi").exists() else None
+        )
+        self.paths["hipcc"] = (
+            str(bin_dir / "hipcc") if (bin_dir / "hipcc").exists() else None
+        )
+        self.paths["amdclang"] = (
+            str(bin_dir / "amdclang") if (bin_dir / "amdclang").exists() else None
+        )
+
         # Check for manifest
         manifest = root / "share" / "therock" / "therock_manifest.json"
         if manifest.exists():
-            self.paths['manifest_file'] = str(manifest)
-    
+            self.paths["manifest_file"] = str(manifest)
+
     def _populate_traditional_paths(self):
         """Populate paths for traditional ROCm installation."""
-        self.paths['rocminfo'] = "/opt/rocm/bin/rocminfo"
-        self.paths['rocm_smi'] = "/opt/rocm/bin/rocm-smi"
-        self.paths['hipcc'] = "/opt/rocm/bin/hipcc"
-        self.paths['version_file'] = "/opt/rocm/.info/version"
-    
+        self.paths["rocminfo"] = "/opt/rocm/bin/rocminfo"
+        self.paths["rocm_smi"] = "/opt/rocm/bin/rocm-smi"
+        self.paths["hipcc"] = "/opt/rocm/bin/hipcc"
+        self.paths["version_file"] = "/opt/rocm/.info/version"
+
     def get_version(self) -> str:
         """Get ROCm version string."""
-        if self.installation_type == 'therock':
+        if self.installation_type == "therock":
             return self._get_therock_version()
-        elif self.installation_type == 'traditional':
+        elif self.installation_type == "traditional":
             return self._get_traditional_version()
         else:
             return "unknown"
-    
+
     def _get_therock_version(self) -> str:
         """Get TheRock version from manifest or rocm-sdk."""
         # Try rocm-sdk command
@@ -253,14 +263,14 @@ def _get_therock_version(self) -> str:
                     return result.stdout.strip()
             except Exception:
                 pass
-        
+
         # Try manifest file
-        if self.therock_details.get('manifest'):
-            commit = self.therock_details['manifest'].get('the_rock_commit', 'unknown')
+        if self.therock_details.get("manifest"):
+            commit = self.therock_details["manifest"].get("the_rock_commit", "unknown")
             return f"TheRock (commit: {commit[:8]})"
-        
+
         return "TheRock (version unknown)"
-    
+
     def _get_traditional_version(self) -> str:
         """Get traditional ROCm version from version file or header."""
         # Try version file
@@ -270,14 +280,14 @@ def _get_traditional_version(self) -> str:
                 return version_file.read_text().strip()
             except Exception:
                 pass
-        
+
         # Try version header
         version_header = Path("/opt/rocm/include/rocm-core/rocm_version.h")
         if version_header.exists():
             try:
                 content = version_header.read_text()
                 major = minor = patch = 0
-                for line in content.split('\n'):
+                for line in content.split("\n"):
                     if "#define ROCM_VERSION_MAJOR" in line:
                         major = line.split()[-1]
                     if "#define ROCM_VERSION_MINOR" in line:
@@ -287,7 +297,7 @@ def _get_traditional_version(self) -> str:
                 return f"rocm-{major}.{minor}.{patch}"
             except Exception:
                 pass
-        
+
         return "unknown"
 
 
@@ -308,11 +318,11 @@ def print_hardware_information():
         if os.path.isfile(path):
             cmd = path
             break
-    
+
     if cmd is None:
         print("WARNING: Install lshw to get hardware information")
         print("         (TheRock images may not include this by default)")
-    
+
     if cmd is not None:
         cmd_info = CommandInfo("HardwareInformation", [cmd])
         return cmd_info
@@ -333,11 +343,11 @@ def print_cpu_hardware_information():
 def print_gpu_hardware_information(gpu_device_type, path_resolver):
     if gpu_device_type == "AMD":
         # Use dynamic path from resolver
-        cmd = path_resolver.paths.get('rocminfo') or "rocminfo"
+        cmd = path_resolver.paths.get("rocminfo") or "rocminfo"
     elif gpu_device_type == "NVIDIA":
         cmd = "nvidia-smi -L"
     else:
-        print ("WARNING: Unknown GPU device detected")
+        print("WARNING: Unknown GPU device detected")
         cmd = "echo 'Unknown GPU device'"
     cmd_info = CommandInfo("GPU Information", [cmd])
     return cmd_info
@@ -372,28 +382,28 @@ def print_memory_information():
 ## ROCm version data
 def print_rocm_version_information(path_resolver):
     global rocm_version
-    
+
     # List all ROCm-like directories
     cmd1 = "ls -v -d /opt/rocm* 2>/dev/null || echo 'No /opt/rocm* directories found'"
-    
+
     # Get version from resolver
     rocm_version = path_resolver.get_version()
-    
+
     cmd2 = f"echo '==== Installation Type: {path_resolver.installation_type} ===='"
     rocm_root_display = path_resolver.rocm_root or "Not found"
     cmd3 = f"echo '==== ROCm Root: {rocm_root_display} ===='"
     cmd4 = f"echo '==== Using {rocm_version} to collect ROCm information ===='"
-    
+
     cmds = [cmd1, cmd2, cmd3, cmd4]
-    
+
     # Add TheRock-specific info
-    if path_resolver.installation_type == 'therock':
-        manifest_file = path_resolver.paths.get('manifest_file')
+    if path_resolver.installation_type == "therock":
+        manifest_file = path_resolver.paths.get("manifest_file")
         if manifest_file:
             cmd5 = f"echo '==== TheRock Manifest: {manifest_file} ===='"
             cmd6 = f"cat {manifest_file}"
             cmds.extend([cmd5, cmd6])
-    
+
     cmd_info = CommandInfo("Available ROCm versions", cmds)
     return cmd_info
 
@@ -401,19 +411,19 @@ def print_rocm_version_information(path_resolver):
 def print_rocm_repo_setup(path_resolver):
     """Print repo setup - only for traditional ROCm installations."""
     cmds = []
-    
-    if path_resolver.installation_type == 'therock':
+
+    if path_resolver.installation_type == "therock":
         cmds.append("echo 'TheRock does not use traditional package repositories'")
         cmds.append("echo 'TheRock is installed via Python pip packages or tarballs'")
-        
+
         # Try to get pip package info
         if shutil.which("rocm-sdk"):
             cmds.append("echo 'Checking rocm-sdk Python package...'")
             cmds.append("rocm-sdk version || true")
             cmds.append("rocm-sdk path --root || true")
-        
+
         # Check if we're in a venv
-        venv_path = os.environ.get('VIRTUAL_ENV')
+        venv_path = os.environ.get("VIRTUAL_ENV")
         if venv_path:
             cmds.append(f"echo 'Virtual environment: {venv_path}'")
             cmds.append("pip list | grep -i rocm || true")
@@ -426,10 +436,10 @@ def print_rocm_repo_setup(path_resolver):
             cmd = "/bin/grep -i -E 'rocm|amdgpu' /etc/apt/sources.list.d/* || echo 'No ROCm repos found'"
         elif os.path.exists("/etc/yum.repos.d/"):
             cmd = "/bin/grep -i -E 'rocm|amdgpu' /etc/yum.repos.d/* || echo 'No ROCm repos found'"
-        
+
         if cmd:
             cmds.append(cmd)
-    
+
     cmd_info = CommandInfo("ROCm Repo Setup", cmds)
     return cmd_info
 
@@ -437,27 +447,35 @@ def print_rocm_repo_setup(path_resolver):
 def print_rocm_packages_installed(path_resolver):
     """Print installed ROCm packages - adapted for TheRock."""
     cmds = []
-    
-    if path_resolver.installation_type == 'therock':
+
+    if path_resolver.installation_type == "therock":
         # Add Pkg type line for CSV parser compatibility
         cmds.append("echo ' Pkg type: therock'")
         cmds.append("echo 'Installation Type: TheRock (no system packages)'")
         cmds.append("echo ''")
-        
+
         # Check Python packages
         cmds.append("echo '=== Python ROCm Packages ==='")
-        cmds.append("pip list 2>/dev/null | grep -i -E 'rocm|hip|torch' || echo 'No Python ROCm packages found'")
-        
+        cmds.append(
+            "pip list 2>/dev/null | grep -i -E 'rocm|hip|torch' || echo 'No Python ROCm packages found'"
+        )
+
         # List files in TheRock installation
         if path_resolver.rocm_root:
             cmds.append("echo ''")
-            cmds.append(f"echo '=== TheRock Installation Contents ({path_resolver.rocm_root}) ==='")
+            cmds.append(
+                f"echo '=== TheRock Installation Contents ({path_resolver.rocm_root}) ==='"
+            )
             cmds.append(f"ls -lh {path_resolver.rocm_root}/bin/ 2>/dev/null || true")
-            cmds.append(f"ls -lh {path_resolver.rocm_root}/lib/ 2>/dev/null | head -20 || true")
-        
+            cmds.append(
+                f"ls -lh {path_resolver.rocm_root}/lib/ 2>/dev/null | head -20 || true"
+            )
+
         # Check for dist_info
         if path_resolver.rocm_root:
-            dist_info = Path(path_resolver.rocm_root) / "share" / "therock" / "dist_info.json"
+            dist_info = (
+                Path(path_resolver.rocm_root) / "share" / "therock" / "dist_info.json"
+            )
             if dist_info.exists():
                 cmds.append("echo ''")
                 cmds.append("echo '=== TheRock Distribution Info ==='")
@@ -472,20 +490,20 @@ def print_rocm_packages_installed(path_resolver):
                         k, v = line.rstrip().split("=", 1)
                         d[k] = v.strip('"')
         except Exception:
-            d = {'ID_LIKE': 'unknown'}
-        
-        pkgtype = d.get('ID_LIKE', d.get('ID', 'unknown'))
+            d = {"ID_LIKE": "unknown"}
+
+        pkgtype = d.get("ID_LIKE", d.get("ID", "unknown"))
         # Note: Format must match csv_parser.py expectations (space before "Pkg")
         cmd1 = "echo ' Pkg type: '" + pkgtype
         cmds.append(cmd1)
-        
-        if 'debian' in pkgtype.lower():
+
+        if "debian" in pkgtype.lower():
             cmd = "/usr/bin/dpkg -l 2>/dev/null | /bin/grep -i -E 'ocl-icd|kfdtest|llvm-amd|miopen|half|^ii  hip|hcc|hsa|rocm|atmi|^ii  comgr|composa|amd-smi|aomp|amdgpu|rock|mivision|migraph|rocprofiler|roctracer|rocbl|hipify|rocsol|rocthr|rocff|rocalu|rocprim|rocrand|rccl|rocspar|rdc|rocwmma|rpp|openmp|amdfwflash|ocl |opencl' | /usr/bin/sort || echo 'No packages found'"
         else:
             cmd = "/usr/bin/rpm -qa 2>/dev/null | /bin/grep -i -E 'ocl-icd|kfdtest|llvm-amd|miopen|half|hip|hcc|hsa|rocm|atmi|comgr|composa|amd-smi|aomp|amdgpu|rock|mivision|migraph|rocprofiler|roctracer|rocblas|hipify|rocsol|rocthr|rocff|rocalu|rocprim|rocrand|rccl|rocspar|rdc|rocwmma|rpp|openmp|amdfwflash|ocl|opencl' | /usr/bin/sort || echo 'No packages found'"
-        
+
         cmds.append(cmd)
-    
+
     cmd_info = CommandInfo("ROCm Packages Installed", cmds)
     return cmd_info
 
@@ -498,23 +516,29 @@ def print_rocm_environment_variables():
 
 def print_rocm_smi_details(smi_config, path_resolver):
     cmd_info = None
-    
+
     # Use dynamic path
-    rocm_smi_cmd = path_resolver.paths.get('rocm_smi') or "rocm-smi"
-    
+    rocm_smi_cmd = path_resolver.paths.get("rocm_smi") or "rocm-smi"
+
     if smi_config == "rocm_smi":
-        cmd_info = CommandInfo("ROCm SMI", [f"{rocm_smi_cmd} || echo 'rocm-smi not available'"])
+        cmd_info = CommandInfo(
+            "ROCm SMI", [f"{rocm_smi_cmd} || echo 'rocm-smi not available'"]
+        )
     elif smi_config == "ifwi_version":
         ifwi_cmd = f"{rocm_smi_cmd} -v || echo 'IFWI version not available'"
         cmd_info = CommandInfo("IFWI version", [ifwi_cmd])
     elif smi_config == "rocm_smi_showhw":
-        showhw_cmd = f"{rocm_smi_cmd} --showhw || echo 'rocm-smi --showhw not available'"
+        showhw_cmd = (
+            f"{rocm_smi_cmd} --showhw || echo 'rocm-smi --showhw not available'"
+        )
         cmd_info = CommandInfo("ROCm SMI showhw", [showhw_cmd])
     elif smi_config == "rocm_smi_pcie":
         pcie_cmd = f"{rocm_smi_cmd} -c 2>/dev/null | /bin/grep -i -E 'pcie' || echo 'PCIe info not available'"
         cmd_info = CommandInfo("ROCm SMI pcieclk clock", [pcie_cmd])
     elif smi_config == "rocm_smi_pids":
-        pids_cmd1 = "ls /sys/class/kfd/kfd/proc/ 2>/dev/null || echo 'KFD proc not available'"
+        pids_cmd1 = (
+            "ls /sys/class/kfd/kfd/proc/ 2>/dev/null || echo 'KFD proc not available'"
+        )
         pids_cmd2 = f"{rocm_smi_cmd} --showpids || echo 'showpids not available'"
         cmd_info = CommandInfo("KFD PIDs sysfs kfd proc", [pids_cmd1, pids_cmd2])
     elif smi_config == "rocm_smi_topology":
@@ -524,13 +548,19 @@ def print_rocm_smi_details(smi_config, path_resolver):
         serial_cmd = f"{rocm_smi_cmd} --showserial || echo 'showserial not available'"
         cmd_info = CommandInfo("showserial", [serial_cmd])
     elif smi_config == "rocm_smi_showperflevel":
-        perf_cmd = f"{rocm_smi_cmd} --showperflevel || echo 'showperflevel not available'"
+        perf_cmd = (
+            f"{rocm_smi_cmd} --showperflevel || echo 'showperflevel not available'"
+        )
         cmd_info = CommandInfo("showperflevel", [perf_cmd])
     elif smi_config == "rocm_smi_showrasinfo":
-        showrasinfo_cmd = f"{rocm_smi_cmd} --showrasinfo all || echo 'showrasinfo not available'"
+        showrasinfo_cmd = (
+            f"{rocm_smi_cmd} --showrasinfo all || echo 'showrasinfo not available'"
+        )
         cmd_info = CommandInfo("ROCm SMI showrasinfo all", [showrasinfo_cmd])
     elif smi_config == "rocm_smi_showxgmierr":
-        showxgmierr_cmd = f"{rocm_smi_cmd} --showxgmierr || echo 'showxgmierr not available'"
+        showxgmierr_cmd = (
+            f"{rocm_smi_cmd} --showxgmierr || echo 'showxgmierr not available'"
+        )
         cmd_info = CommandInfo("ROCm SMI showxgmierr", [showxgmierr_cmd])
     elif smi_config == "rocm_smi_clocks":
         clock_cmd = f"{rocm_smi_cmd} -cga || echo 'clock info not available'"
@@ -539,19 +569,23 @@ def print_rocm_smi_details(smi_config, path_resolver):
         compute_cmd = f"{rocm_smi_cmd} --showcomputepartition || echo 'showcomputepartition not available'"
         cmd_info = CommandInfo("ROCm Show computepartition", [compute_cmd])
     elif smi_config == "rocm_smi_nodesbw":
-        nodesbw_cmd = f"{rocm_smi_cmd} --shownodesbw || echo 'shownodesbw not available'"
+        nodesbw_cmd = (
+            f"{rocm_smi_cmd} --shownodesbw || echo 'shownodesbw not available'"
+        )
         cmd_info = CommandInfo("ROCm Show Nodebsion", [nodesbw_cmd])
     elif smi_config == "rocm_smi_gpudeviceid":
-        gpudeviceid_cmd = f"{rocm_smi_cmd} -i -d 0 || echo 'GPU device ID not available'"
+        gpudeviceid_cmd = (
+            f"{rocm_smi_cmd} -i -d 0 || echo 'GPU device ID not available'"
+        )
         cmd_info = CommandInfo("ROCM Show GPU Device ID", [gpudeviceid_cmd])
     else:
         cmd_info = None
-    
+
     return cmd_info
 
 
 def print_rocm_info_details(path_resolver):
-    rocminfo_cmd = path_resolver.paths.get('rocminfo') or "rocminfo"
+    rocminfo_cmd = path_resolver.paths.get("rocminfo") or "rocminfo"
     cmd = f"{rocminfo_cmd} || echo 'rocminfo not available'"
     cmd_info = CommandInfo("rocminfo", [cmd])
     return cmd_info
@@ -566,25 +600,38 @@ def print_dmesg_logs(ignore_prev_boot_logs=True):
         cmd1_str = "WARNING: Persistent logging possibly disabled.\\n"
         cmd1_str = cmd1_str + "WARNING: Please run: \\n"
         cmd1_str = cmd1_str + "       sudo mkdir -p /var/log/journal\\n"
-        cmd1_str = cmd1_str + "       sudo systemctl restart systemd-journald.service \\n"
-        cmd1_str = cmd1_str + "WARNING: to enable persistent boot logs for collection and analysis.\\n"
+        cmd1_str = (
+            cmd1_str + "       sudo systemctl restart systemd-journald.service \\n"
+        )
+        cmd1_str = (
+            cmd1_str
+            + "WARNING: to enable persistent boot logs for collection and analysis.\\n"
+        )
         cmd1_str = "echo '" + cmd1_str + "'"
         cmds.append(cmd1_str)
 
     cmds.append("echo 'Section: dmesg boot logs'")
-    cmds.append("/bin/dmesg -T 2>/dev/null | /bin/grep -i -E ' Linux v| Command line|power|pnp|pci|gpu|drm|error|xgmi|panic|watchdog|bug|nmi|dazed|too|mce|edac|oop|fail|fault|atom|bios|kfd|vfio|iommu|ras_mask|ECC|smpboot.*CPU|pcieport.*AER|amdfwflash' || echo 'dmesg not available'")
-    
+    cmds.append(
+        "/bin/dmesg -T 2>/dev/null | /bin/grep -i -E ' Linux v| Command line|power|pnp|pci|gpu|drm|error|xgmi|panic|watchdog|bug|nmi|dazed|too|mce|edac|oop|fail|fault|atom|bios|kfd|vfio|iommu|ras_mask|ECC|smpboot.*CPU|pcieport.*AER|amdfwflash' || echo 'dmesg not available'"
+    )
+
     if not ignore_prev_boot_logs:
         cmd_exec = shutil.which("journalctl")
-        
+
         if cmd_exec is not None:
             cmds.append("echo 'Section: Current boot logs'")
             boot_exec = "/bin/grep -i -E ' Linux v| Command line|power|pnp|pci|gpu|drm|error|xgmi|panic|watchdog|bug|nmi|dazed|too|mce|edac|oop|fail|fault|atom|bios|kfd|vfio|iommu|ras_mask|ECC|smpboot.*CPU|pcieport.*AER|amdfwflash'"
-            cmds.append(f"{cmd_exec} -b 2>/dev/null | {boot_exec} || echo 'journalctl not available'")
+            cmds.append(
+                f"{cmd_exec} -b 2>/dev/null | {boot_exec} || echo 'journalctl not available'"
+            )
             cmds.append("echo 'Section: Previous boot logs'")
-            cmds.append(f"{cmd_exec} -b 1 2>/dev/null | {boot_exec} || echo 'Previous boot logs not available'")
+            cmds.append(
+                f"{cmd_exec} -b 1 2>/dev/null | {boot_exec} || echo 'Previous boot logs not available'"
+            )
             cmds.append("echo 'Section: Second boot logs'")
-            cmds.append(f"{cmd_exec} -b 2 2>/dev/null | {boot_exec} || echo 'Second boot logs not available'")
+            cmds.append(
+                f"{cmd_exec} -b 2 2>/dev/null | {boot_exec} || echo 'Second boot logs not available'"
+            )
 
     cmd_info = CommandInfo("dmesg GPU/DRM/ATOM/BIOS", cmds)
     return cmd_info
@@ -631,21 +678,23 @@ def print_cuda_packages_installed():
                 if "=" in line:
                     k, v = line.rstrip().split("=", 1)
                     d[k] = v.strip('"')
-        
-        pkgtype = d.get('ID_LIKE', d.get('ID', 'unknown'))
+
+        pkgtype = d.get("ID_LIKE", d.get("ID", "unknown"))
         # Note: Format must match csv_parser.py expectations (space before "Pkg")
         cmd1 = "echo ' Pkg type: '" + pkgtype
         cmd2 = None
-        
-        if 'debian' in pkgtype.lower():
+
+        if "debian" in pkgtype.lower():
             cmd2 = "/usr/bin/dpkg -l 2>/dev/null | /bin/grep -i -E 'cuda|cu|atlas|hdf5|nccl|nvinfer|nvjpeg|onnx' || echo 'No CUDA packages found'"
         else:
             cmd2 = "/usr/bin/rpm -qa 2>/dev/null | /bin/grep -i -E 'cuda|cu|atlas|hdf5|nccl|nvinfer|nvjpeg|onnx' || echo 'No CUDA packages found'"
-        
+
         cmd_info = CommandInfo("CUDA Packages Installed", [cmd1, cmd2])
     except Exception as e:
-        cmd_info = CommandInfo("CUDA Packages Installed", [f"echo 'Error checking packages: {e}'"])
-    
+        cmd_info = CommandInfo(
+            "CUDA Packages Installed", [f"echo 'Error checking packages: {e}'"]
+        )
+
     return cmd_info
 
 
@@ -663,7 +712,7 @@ def dump_system_env_information(configs, output_name):
             out_path = os.path.join(out_dir, config)
             os.makedirs(out_path)
             log_file = out_path + "/" + config + ".txt"
-            fs = open(log_file, 'w')
+            fs = open(log_file, "w")
 
             cmd_info = env_map[config]
             if cmd_info is not None:
@@ -683,63 +732,91 @@ def dump_system_env_information(configs, output_name):
 
 def determine_gpu_device_type(path_resolver):
     gpu_device_type = ""
-    
+
     # Try rocm-smi
-    rocm_smi_cmd = path_resolver.paths.get('rocm_smi') or "rocm-smi"
+    rocm_smi_cmd = path_resolver.paths.get("rocm_smi") or "rocm-smi"
     rocm_smi_out = console.sh(f"{rocm_smi_cmd} 2>/dev/null || true", canFail=True)
-    
+
     # Try nvidia-smi
     nv_smi_out = console.sh("nvidia-smi -L 2>/dev/null || true", canFail=True)
-    
+
     if rocm_smi_out and "not found" not in rocm_smi_out and len(rocm_smi_out) > 10:
         gpu_device_type = "AMD"
     elif nv_smi_out and "not found" not in nv_smi_out and len(nv_smi_out) > 10:
         gpu_device_type = "NVIDIA"
-    
+
     return gpu_device_type
 
 
 def generate_env_info(gpu_device_type, path_resolver):
     global env_map
-    
+
     print(f"Installation Type: {path_resolver.installation_type}")
     print(f"ROCm Root: {path_resolver.rocm_root or 'Not found'}")
     print(f"GPU Device Type: {gpu_device_type or 'Unknown'}")
-    
+
     env_map["hardware_information"] = print_hardware_information()
     env_map["cpu_information"] = print_cpu_hardware_information()
-    env_map["gpu_information"] = print_gpu_hardware_information(gpu_device_type, path_resolver)
+    env_map["gpu_information"] = print_gpu_hardware_information(
+        gpu_device_type, path_resolver
+    )
     env_map["bios_settings"] = print_bios_settings()
     env_map["os_information"] = print_os_information()
     env_map["dmsg_gpu_drm_atom_logs"] = print_dmesg_logs(ignore_prev_boot_logs=True)
     env_map["amdgpu_modinfo"] = print_amdgpu_modinfo()
     env_map["memory_information"] = print_memory_information()
-    
+
     if gpu_device_type == "AMD":
         env_map["rocm_information"] = print_rocm_version_information(path_resolver)
         env_map["rocm_repo_setup"] = print_rocm_repo_setup(path_resolver)
-        env_map["rocm_packages_installed"] = print_rocm_packages_installed(path_resolver)
+        env_map["rocm_packages_installed"] = print_rocm_packages_installed(
+            path_resolver
+        )
         env_map["rocm_env_variables"] = print_rocm_environment_variables()
         env_map["rocm_smi"] = print_rocm_smi_details("rocm_smi", path_resolver)
         env_map["ifwi_version"] = print_rocm_smi_details("ifwi_version", path_resolver)
-        env_map["rocm_smi_showhw"] = print_rocm_smi_details("rocm_smi_showhw", path_resolver)
-        env_map["rocm_smi_pcie"] = print_rocm_smi_details("rocm_smi_pcie", path_resolver)
-        env_map["rocm_smi_pids"] = print_rocm_smi_details("rocm_smi_pids", path_resolver)
-        env_map["rocm_smi_topology"] = print_rocm_smi_details("rocm_smi_topology", path_resolver)
-        env_map["rocm_smi_showserial"] = print_rocm_smi_details("rocm_smi_showserial", path_resolver)
-        env_map["rocm_smi_showperflevel"] = print_rocm_smi_details("rocm_smi_showperflevel", path_resolver)
-        env_map["rocm_smi_showrasinfo"] = print_rocm_smi_details("rocm_smi_showrasinfo", path_resolver)
-        env_map["rocm_smi_showxgmierr"] = print_rocm_smi_details("rocm_smi_showxgmierr", path_resolver)
-        env_map["rocm_smi_clocks"] = print_rocm_smi_details("rocm_smi_clocks", path_resolver)
-        env_map["rocm_smi_showcompute_partition"] = print_rocm_smi_details("rocm_smi_showcompute_partition", path_resolver)
-        env_map["rocm_smi_nodesbwi"] = print_rocm_smi_details("rocm_smi_nodesbw", path_resolver)
-        env_map["rocm_smi_gpudeviceid"] = print_rocm_smi_details("rocm_smi_gpudeviceid", path_resolver)
+        env_map["rocm_smi_showhw"] = print_rocm_smi_details(
+            "rocm_smi_showhw", path_resolver
+        )
+        env_map["rocm_smi_pcie"] = print_rocm_smi_details(
+            "rocm_smi_pcie", path_resolver
+        )
+        env_map["rocm_smi_pids"] = print_rocm_smi_details(
+            "rocm_smi_pids", path_resolver
+        )
+        env_map["rocm_smi_topology"] = print_rocm_smi_details(
+            "rocm_smi_topology", path_resolver
+        )
+        env_map["rocm_smi_showserial"] = print_rocm_smi_details(
+            "rocm_smi_showserial", path_resolver
+        )
+        env_map["rocm_smi_showperflevel"] = print_rocm_smi_details(
+            "rocm_smi_showperflevel", path_resolver
+        )
+        env_map["rocm_smi_showrasinfo"] = print_rocm_smi_details(
+            "rocm_smi_showrasinfo", path_resolver
+        )
+        env_map["rocm_smi_showxgmierr"] = print_rocm_smi_details(
+            "rocm_smi_showxgmierr", path_resolver
+        )
+        env_map["rocm_smi_clocks"] = print_rocm_smi_details(
+            "rocm_smi_clocks", path_resolver
+        )
+        env_map["rocm_smi_showcompute_partition"] = print_rocm_smi_details(
+            "rocm_smi_showcompute_partition", path_resolver
+        )
+        env_map["rocm_smi_nodesbwi"] = print_rocm_smi_details(
+            "rocm_smi_nodesbw", path_resolver
+        )
+        env_map["rocm_smi_gpudeviceid"] = print_rocm_smi_details(
+            "rocm_smi_gpudeviceid", path_resolver
+        )
         env_map["rocm_info"] = print_rocm_info_details(path_resolver)
     elif gpu_device_type == "NVIDIA":
         env_map["cuda_information"] = print_cuda_version_information()
         env_map["cuda_env_variables"] = print_cuda_env_variables()
         env_map["cuda_packages_installed"] = print_cuda_packages_installed()
-    
+
     env_map["pip_list"] = print_pip_list_details()
 
     if os.path.exists("/proc/sys/kernel/numa_balancing"):
@@ -749,22 +826,22 @@ def generate_env_info(gpu_device_type, path_resolver):
 def main():
     # Initialize path resolver
     path_resolver = RocmPathResolver(verbose=args.verbose)
-    
+
     # Detect GPU type with resolver
     gpu_device_type = determine_gpu_device_type(path_resolver)
-    
+
     # Generate environment info
     generate_env_info(gpu_device_type, path_resolver)
-    
+
     # Get configs
     configs = env_map.keys()
     if args.lite:
         configs = parse_env_tags_json("env_tags.json")
-    
+
     # Dump system environment information
     dump_system_env_information(configs, args.output_name)
     print(f"OK: finished dumping the system env details in .{args.output_name} folder")
-    
+
     # CSV output
     if args.dump_csv or args.print_csv:
         csv_file = args.output_name + ".csv"
@@ -775,21 +852,31 @@ def main():
             csv_parser.print_csv_output()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="System environment data collection tool (TheRock + Traditional ROCm compatible)"
     )
-    parser.add_argument("--lite", action="store_true", 
-                       help="System environment data lite version taken from env_tags.json")
-    parser.add_argument("--dump-csv", action="store_true", 
-                       help="Dump system config info in CSV file")
-    parser.add_argument("--print-csv", action="store_true", 
-                       help="Print system config info data")
-    parser.add_argument("--output-name", required=False, default="sys_config_info", 
-                       help="Output file or directory name")
-    parser.add_argument("-v", "--verbose", action="store_true", 
-                       help="Enable verbose detection output")
-    
+    parser.add_argument(
+        "--lite",
+        action="store_true",
+        help="System environment data lite version taken from env_tags.json",
+    )
+    parser.add_argument(
+        "--dump-csv", action="store_true", help="Dump system config info in CSV file"
+    )
+    parser.add_argument(
+        "--print-csv", action="store_true", help="Print system config info data"
+    )
+    parser.add_argument(
+        "--output-name",
+        required=False,
+        default="sys_config_info",
+        help="Output file or directory name",
+    )
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Enable verbose detection output"
+    )
+
     args = parser.parse_args()
     console = Console(shellVerbose=False, live_output=False)
 
diff --git a/src/madengine/scripts/common/pre_scripts/rocEnvTool/test_rocenv.sh b/src/madengine/scripts/common/pre_scripts/rocEnvTool/test_rocenv.sh
index a817001e..84c40a01 100644
--- a/src/madengine/scripts/common/pre_scripts/rocEnvTool/test_rocenv.sh
+++ b/src/madengine/scripts/common/pre_scripts/rocEnvTool/test_rocenv.sh
@@ -101,11 +101,11 @@ echo
 echo "Test 7: Output directory validation"
 if [ -d ".test_basic" ]; then
     pass "Output directory created"
-    
+
     # Count subdirectories
     NUM_SECTIONS=$(find .test_basic -mindepth 1 -maxdepth 1 -type d | wc -l)
     info "Generated $NUM_SECTIONS information sections"
-    
+
     if [ "$NUM_SECTIONS" -gt 5 ]; then
         pass "Sufficient sections generated ($NUM_SECTIONS)"
     else
@@ -136,16 +136,16 @@ echo
 echo "Test 9: ROCm-specific sections"
 if [ -d ".test_basic/rocm_information" ]; then
     pass "ROCm information section generated"
-    
+
     # Check content
     if [ -f ".test_basic/rocm_information/rocm_information.txt" ]; then
         CONTENT=$(cat .test_basic/rocm_information/rocm_information.txt)
-        
+
         if echo "$CONTENT" | grep -q "Installation Type:"; then
             DETECTED_TYPE=$(echo "$CONTENT" | grep "Installation Type:" | head -1)
             pass "ROCm installation type detected: $DETECTED_TYPE"
         fi
-        
+
         if echo "$CONTENT" | grep -q "ROCm Root:"; then
             DETECTED_ROOT=$(echo "$CONTENT" | grep "ROCm Root:" | head -1)
             pass "ROCm root identified: $DETECTED_ROOT"
@@ -161,10 +161,10 @@ echo "Test 10: CSV generation"
 if python3 rocenv_tool_v2.py --output-name test_csv --dump-csv > /dev/null 2>&1; then
     if [ -f "test_csv.csv" ]; then
         pass "CSV file generated"
-        
+
         LINE_COUNT=$(wc -l < test_csv.csv)
         info "CSV contains $LINE_COUNT lines"
-        
+
         if [ "$LINE_COUNT" -gt 10 ]; then
             pass "CSV contains data"
         fi
@@ -231,4 +231,3 @@ echo "- README_v2.md - Usage guide"
 echo "- THEROCK_COMPATIBILITY.md - Compatibility details"
 echo "- IMPLEMENTATION_SUMMARY.md - Implementation overview"
 echo
-
diff --git a/src/madengine/scripts/common/pre_scripts/run_rocenv_tool.sh b/src/madengine/scripts/common/pre_scripts/run_rocenv_tool.sh
index 84879d05..95ac8042 100644
--- a/src/madengine/scripts/common/pre_scripts/run_rocenv_tool.sh
+++ b/src/madengine/scripts/common/pre_scripts/run_rocenv_tool.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 OUTPUT_FILE_NAME=${1:-"sys_config_info"}
 
diff --git a/src/madengine/scripts/common/pre_scripts/trace.sh b/src/madengine/scripts/common/pre_scripts/trace.sh
index 5c591c83..8a8a16ca 100644
--- a/src/madengine/scripts/common/pre_scripts/trace.sh
+++ b/src/madengine/scripts/common/pre_scripts/trace.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 set -e
 set -x
@@ -40,7 +40,7 @@ rpd)
 	else
 		echo "rocmProfileData directory already exists, skipping clone"
 	fi
-	
+
 	# Build RPD tracer locally without system install
 	cd ./rocmProfileData
 	# Workaround for upstream rocmProfileData Makefile typo: UStringTable.o -> StringTable.o
@@ -52,7 +52,7 @@ rpd)
 		echo "Error: Failed to build RPD tracer"
 		exit 1
 	fi
-	
+
 	# Install rocpd Python module locally
 	cd rocpd_python
 	python3 setup.py install
@@ -61,7 +61,7 @@ rpd)
 		exit 1
 	fi
 	cd ../..
-	
+
 	echo "RPD setup completed successfully"
 	;;
 
diff --git a/src/madengine/scripts/common/test_echo.sh b/src/madengine/scripts/common/test_echo.sh
index 01c2830b..f9e3a7e0 100644
--- a/src/madengine/scripts/common/test_echo.sh
+++ b/src/madengine/scripts/common/test_echo.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 echo "$@"
diff --git a/src/madengine/scripts/common/tools.json b/src/madengine/scripts/common/tools.json
index 82869087..ecb2edb7 100644
--- a/src/madengine/scripts/common/tools.json
+++ b/src/madengine/scripts/common/tools.json
@@ -340,4 +340,4 @@
       "post_scripts": []
     }
   }
-}
\ No newline at end of file
+}
diff --git a/src/madengine/scripts/common/tools/amd_smi_utils.py b/src/madengine/scripts/common/tools/amd_smi_utils.py
index e0e48096..2057b4c7 100644
--- a/src/madengine/scripts/common/tools/amd_smi_utils.py
+++ b/src/madengine/scripts/common/tools/amd_smi_utils.py
@@ -6,34 +6,33 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
-import sys
 import logging
-from typing import List, Optional, Dict, Any
+import sys
+from typing import Any, Dict, List, Optional
 
 sys.path.append("/opt/rocm/libexec/amdsmi_cli/")
 try:
-    from amdsmi_init import amdsmi_interface
-    from amdsmi_init import amdsmi_cli_init, amdsmi_cli_shutdown
+    from amdsmi_init import amdsmi_cli_init, amdsmi_cli_shutdown, amdsmi_interface
 except ImportError:
     raise ImportError("Could not import /opt/rocm/libexec/amdsmi_cli/amdsmi_init.py")
 
 
 class ProfUtils:
     """Class to get GPU information using AMD amd-smi utility.
-    
+
     Attributes:
         amdsmi_initialized: Whether amdsmi interface is initialized.
         processor_handles: List of GPU processor handles.
     """
-    
+
     def __init__(self, mode) -> None:
         """Initialize the amd-smi utils class
-        
+
         @param mode: Mode parameter for compatibility (not used in amd-smi)
         """
         self.amdsmi_initialized = False
         self.processor_handles = []
-        
+
         try:
             # Initialize amdsmi using the amdsmi_cli_init function
             amdsmi_cli_init()
@@ -41,7 +40,7 @@ def __init__(self, mode) -> None:
             logging.debug("amdsmi_cli_init() successful")
         except Exception as e:
             raise ImportError(f"Failed to initialize amd-smi interface: {e}")
-        
+
         try:
             # Get processor handles (GPU devices)
             self.processor_handles = amdsmi_interface.amdsmi_get_processor_handles()
@@ -53,37 +52,37 @@ def __init__(self, mode) -> None:
 
     def get_power(self, device: int) -> str:
         """Get current socket power of a given device.
-        
+
         Args:
             device: GPU device index.
-            
+
         Returns:
             Power consumption in watts as string, or 'N/A' if unavailable.
         """
         try:
             if device >= len(self.processor_handles):
-                return 'N/A'
-            
+                return "N/A"
+
             processor_handle = self.processor_handles[device]
             power_info = amdsmi_interface.amdsmi_get_power_info(processor_handle)
-            
+
             # power_info is a dict with keys like 'current_socket_power', 'average_socket_power', etc.
             # Values are in milliwatts, convert to watts
-            if 'current_socket_power' in power_info:
-                power_mw = power_info['current_socket_power']
+            if "current_socket_power" in power_info:
+                power_mw = power_info["current_socket_power"]
                 return str(float(power_mw) / 1000.0)
-            elif 'average_socket_power' in power_info:
-                power_mw = power_info['average_socket_power']
+            elif "average_socket_power" in power_info:
+                power_mw = power_info["average_socket_power"]
                 return str(float(power_mw) / 1000.0)
-            
-            return 'N/A'
+
+            return "N/A"
         except Exception as e:
             logging.debug(f"Failed to get power for device {device}: {e}")
-            return 'N/A'
+            return "N/A"
 
     def list_devices(self) -> List[int]:
         """Get list of GPU device indices.
-        
+
         Returns:
             List of device indices.
         """
@@ -92,29 +91,33 @@ def list_devices(self) -> List[int]:
 
     def get_mem_info(self, device: int) -> float:
         """Get memory usage percentage for a device.
-        
+
         Args:
             device: GPU device index.
-            
+
         Returns:
             Memory usage percentage as float.
         """
         try:
             if device >= len(self.processor_handles):
                 return 0.0
-            
+
             processor_handle = self.processor_handles[device]
-            
+
             # Try to get VRAM usage directly
             vram_info = amdsmi_interface.amdsmi_get_gpu_vram_usage(processor_handle)
-            
+
             # vram_info is a dict with 'vram_used' and 'vram_total' in bytes
-            if isinstance(vram_info, dict) and 'vram_used' in vram_info and 'vram_total' in vram_info:
-                used = float(vram_info['vram_used'])
-                total = float(vram_info['vram_total'])
+            if (
+                isinstance(vram_info, dict)
+                and "vram_used" in vram_info
+                and "vram_total" in vram_info
+            ):
+                used = float(vram_info["vram_used"])
+                total = float(vram_info["vram_total"])
                 if total > 0:
                     return round((used / total) * 100, 2)
-            
+
             return 0.0
         except Exception as e:
             logging.debug(f"Failed to get memory info for device {device}: {e}")
@@ -122,41 +125,45 @@ def get_mem_info(self, device: int) -> float:
 
     def check_if_secondary_die(self, device: int) -> bool:
         """Check if GPU device is the secondary die in a MCM.
-        
+
         MI200 device specific feature check.
         The secondary dies lack power management features.
-        
+
         Args:
             device: The device to check.
-            
+
         Returns:
             True if secondary die, False otherwise.
         """
         try:
             if device >= len(self.processor_handles):
                 return False
-            
+
             processor_handle = self.processor_handles[device]
-            
+
             # Check if power management is enabled - secondary dies typically don't have it
-            is_power_mgmt_enabled = amdsmi_interface.amdsmi_is_gpu_power_management_enabled(processor_handle)
+            is_power_mgmt_enabled = (
+                amdsmi_interface.amdsmi_is_gpu_power_management_enabled(
+                    processor_handle
+                )
+            )
             if not is_power_mgmt_enabled:
                 return True
-            
+
             # Alternative check: get power info and see if it's zero/unavailable
             try:
                 power_info = amdsmi_interface.amdsmi_get_power_info(processor_handle)
                 if isinstance(power_info, dict):
                     # If both current and average power are 0, it's likely a secondary die
-                    current_power = power_info.get('current_socket_power', -1)
-                    avg_power = power_info.get('average_socket_power', -1)
+                    current_power = power_info.get("current_socket_power", -1)
+                    avg_power = power_info.get("average_socket_power", -1)
                     if current_power == 0 and avg_power == 0:
                         return True
             except Exception:
                 # If we can't get power info, might be secondary die
                 return True
-            
+
             return False
         except Exception as e:
             logging.debug(f"Failed to check secondary die for device {device}: {e}")
-            return False
\ No newline at end of file
+            return False
diff --git a/src/madengine/scripts/common/tools/detect_therock.sh b/src/madengine/scripts/common/tools/detect_therock.sh
index 2e04d2d1..b670cf09 100755
--- a/src/madengine/scripts/common/tools/detect_therock.sh
+++ b/src/madengine/scripts/common/tools/detect_therock.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 #
 # Quick TheRock ROCm Detection Script
-# 
+#
 # This script checks if TheRock is installed on the system.
 # TheRock does NOT use apt - it uses Python pip or tarballs.
 #
@@ -26,34 +26,34 @@ echo ""
 check_therock_path() {
     path="$1"
     label="$2"
-    
+
     if [ ! -d "$path" ]; then
         return 1
     fi
-    
+
     manifest="$path/share/therock/therock_manifest.json"
     dist_info="$path/share/therock/dist_info.json"
-    
+
     if [ -f "$manifest" ]; then
         printf "${GREEN}✓ Found TheRock installation${NC}\n"
         echo "  Type: $label"
         echo "  Path: $path"
-        
+
         if [ -f "$dist_info" ]; then
             targets=$(grep -oP '(?<="dist_amdgpu_targets": ")[^"]*' "$dist_info" 2>/dev/null || echo "unknown")
             echo "  GPU Targets: $targets"
         fi
-        
+
         if command -v jq > /dev/null 2>&1; then
             commit=$(jq -r '.the_rock_commit' "$manifest" 2>/dev/null || echo "unknown")
             echo "  Commit: $commit"
         fi
-        
+
         echo ""
         FOUND=$((FOUND + 1))
         return 0
     fi
-    
+
     return 1
 }
 
@@ -61,11 +61,11 @@ check_therock_path() {
 printf "${BLUE}[1] Checking for rocm-sdk command...${NC}\n"
 if command -v rocm-sdk > /dev/null 2>&1; then
     printf "${GREEN}✓ Found rocm-sdk command${NC}\n"
-    
+
     # Get version
     version=$(rocm-sdk version 2>/dev/null || echo "unknown")
     echo "  Version: $version"
-    
+
     # Get root path
     if root_path=$(rocm-sdk path --root 2>/dev/null); then
         echo "  Root: $root_path"
@@ -82,7 +82,7 @@ if python3 -c "import rocm_sdk" 2>/dev/null; then
     version=$(python3 -c "import rocm_sdk; print(rocm_sdk.__version__)" 2>/dev/null || echo "unknown")
     printf "${GREEN}✓ Found rocm_sdk Python package${NC}\n"
     echo "  Version: $version"
-    
+
     # Try to find the package path
     pkg_path=$(python3 -c "
 import importlib.util
@@ -91,7 +91,7 @@ spec = importlib.util.find_spec('_rocm_sdk_core')
 if spec and spec.origin:
     print(pathlib.Path(spec.origin).parent)
 " 2>/dev/null || echo "")
-    
+
     if [ -n "$pkg_path" ]; then
         check_therock_path "$pkg_path" "Python Package"
     fi
@@ -132,7 +132,7 @@ if [ -f "version.json" ] && [ -f "CMakeLists.txt" ]; then
     if grep -q "rocm-version" version.json 2>/dev/null; then
         printf "${YELLOW}✓ Found TheRock source directory${NC}\n"
         echo "  Path: $(pwd)"
-        
+
         if [ -d "build/dist" ]; then
             for dist_dir in build/dist/*; do
                 if [ -d "$dist_dir" ]; then
@@ -173,4 +173,3 @@ else
     echo "More info: https://github.com/ROCm/TheRock/blob/main/RELEASES.md"
     exit 1
 fi
-
diff --git a/src/madengine/scripts/common/tools/get_library_trace.py b/src/madengine/scripts/common/tools/get_library_trace.py
index d011e643..f614d786 100644
--- a/src/madengine/scripts/common/tools/get_library_trace.py
+++ b/src/madengine/scripts/common/tools/get_library_trace.py
@@ -5,17 +5,17 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
-# built-in modules
-import sys
+import csv
 import io
 import os
 import re
-from datetime import datetime
-import csv
 import subprocess
-from contextlib import redirect_stdout, redirect_stderr
-import typing
 
+# built-in modules
+import sys
+import typing
+from contextlib import redirect_stderr, redirect_stdout
+from datetime import datetime
 
 # Global variables of the trace mode
 mode = os.environ.get("TRACE_MODE", "").replace(" ", "").split(",")
@@ -24,7 +24,7 @@
     mode.append("rocblas_trace")
 
 if os.environ.get("HIPBLASLT_TRACE"):
-    mode.append("hipblaslt_trace")    
+    mode.append("hipblaslt_trace")
 
 if os.environ.get("TENSILE_TRACE"):
     mode.append("tensile_trace")
@@ -175,22 +175,20 @@ def process_miopen_trace(output_lines: list) -> bool:
 
 class LibraryFilter(object):
     """Class to filter the library trace information
-    
+
     This class filters the library trace information based on the mode
-    
+
     Args:
         mode: Mode of the trace
         liveOutput: Boolean value
         printConfigs: Boolean value
     """
+
     def __init__(
-            self, 
-            mode: str, 
-            liveOutput: bool=False, 
-            printConfigs: bool=False
-        ) -> None:
+        self, mode: str, liveOutput: bool = False, printConfigs: bool = False
+    ) -> None:
         """Initialize the LibraryFilter class
-        
+
         Args:
             mode: Mode of the trace
             liveOutput: Boolean value
@@ -206,17 +204,14 @@ def __init__(
 
         self.printConfigs = printConfigs
 
-    def write(
-            self, 
-            data: str
-        ) -> None:
+    def write(self, data: str) -> None:
         """Write the data
-        
+
         This function writes the data
-        
+
         Args:
             data: Data to write
-        
+
         Returns:
             None
         """
@@ -230,8 +225,8 @@ def write(
             matched |= r_match
 
         if "hipblaslt_trace" in mode:
-            r_match = process_hipblaslt_trace(data.splitlines() )
-            matched |= r_match 
+            r_match = process_hipblaslt_trace(data.splitlines())
+            matched |= r_match
 
         if "tensile_trace" in mode:
             t_match = process_tensile_trace(data.splitlines())
@@ -256,19 +251,17 @@ def flush(self):
 
 
 def run_command(
-        commandstring: str, 
-        request_env: typing.Dict[str, str],
-        outlog: typing.Any
-    ):
+    commandstring: str, request_env: typing.Dict[str, str], outlog: typing.Any
+):
     """Run the command
-    
+
     This function runs the command
-    
+
     Args:
         commandstring: Command string
         request_env: Request environment
         outlog: Output log
-    
+
     Returns:
         None
     """
@@ -278,20 +271,20 @@ def run_command(
     # Run subprocess with STDOUT (not PIPE) so output goes directly to our stdout
     # This avoids buffering issues with nested processes
     process = subprocess.Popen(
-        commandstring, 
-        shell=True, 
+        commandstring,
+        shell=True,
         env=modified_env,
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,  # Merge stderr into stdout
         universal_newlines=True,
-        bufsize=1  # Line buffered
+        bufsize=1,  # Line buffered
     )
-    
+
     # Stream output line by line
     for line in process.stdout:
         outlog.write(line)
         outlog.flush()
-    
+
     # Wait for process to complete
     process.wait()
 
diff --git a/src/madengine/scripts/common/tools/gpu_info_profiler.py b/src/madengine/scripts/common/tools/gpu_info_profiler.py
index 111f655d..194fb3ba 100644
--- a/src/madengine/scripts/common/tools/gpu_info_profiler.py
+++ b/src/madengine/scripts/common/tools/gpu_info_profiler.py
@@ -5,24 +5,25 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
+import csv
+import datetime
+import logging
+
 # built-in modules
 import multiprocessing
-import threading
-import time
-import datetime
+import os
+import signal
 import subprocess
 import sys
-import csv
-import os
-import logging
+import threading
+import time
 import typing
-import signal
-from typing import Optional, List, Dict, Any
+from typing import Any, Dict, List, Optional
 
 
 def check_amd_smi_available() -> bool:
     """Check if amd-smi command or Python bindings are available.
-    
+
     Returns:
         bool: True if amd-smi is available, False otherwise.
     """
@@ -30,69 +31,74 @@ def check_amd_smi_available() -> bool:
     try:
         sys.path.append("/opt/rocm/libexec/amdsmi_cli/")
         from amdsmi_init import amdsmi_interface
+
         logging.debug("amd-smi Python bindings found at /opt/rocm/libexec/amdsmi_cli/")
         return True
     except ImportError:
         logging.debug("amd-smi Python bindings not found")
-    
+
     # Fallback to checking command-line tool
     try:
         result = subprocess.run(
-            ['amd-smi', '--version'],
-            capture_output=True,
-            text=True,
-            timeout=10
+            ["amd-smi", "--version"], capture_output=True, text=True, timeout=10
         )
         if result.returncode == 0:
             logging.debug("amd-smi command-line tool found")
             return True
-    except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError) as e:
+    except (
+        subprocess.TimeoutExpired,
+        FileNotFoundError,
+        subprocess.SubprocessError,
+    ) as e:
         logging.debug(f"amd-smi command not available: {e}")
-    
+
     return False
 
+
 def get_rocm_version() -> Optional[float]:
     """Get ROCm version from system.
-    
+
     Returns:
         Optional[float]: ROCm version as major.minor (e.g., 6.1), or None if not detected.
     """
     try:
         # Try hipconfig --version first (more reliable)
         result = subprocess.run(
-            ['hipconfig', '--version'],
-            capture_output=True,
-            text=True,
-            timeout=10
+            ["hipconfig", "--version"], capture_output=True, text=True, timeout=10
         )
         if result.returncode == 0:
             # example output: 6.1.40092-038397aaa
             version_str = result.stdout.strip()
-            version_parts = version_str.split('.')[:2]  # Get major.minor
-            return float('.'.join(version_parts))
+            version_parts = version_str.split(".")[:2]  # Get major.minor
+            return float(".".join(version_parts))
     except (subprocess.SubprocessError, ValueError, IndexError) as e:
         logging.debug(f"hipconfig check failed: {e}")
-    
+
     try:
         # Fallback to /opt/rocm/.info/version
         if os.path.exists("/opt/rocm/.info/version"):
-            result = subprocess.run(['cat', '/opt/rocm/.info/version'], 
-                                  capture_output=True, text=True, timeout=10)
+            result = subprocess.run(
+                ["cat", "/opt/rocm/.info/version"],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
             if result.returncode == 0:
-                version_str = result.stdout.strip().split('-')[0]  # Remove build suffix
-                version_parts = version_str.split('.')[:2]  # Get major.minor
-                return float('.'.join(version_parts))
+                version_str = result.stdout.strip().split("-")[0]  # Remove build suffix
+                version_parts = version_str.split(".")[:2]  # Get major.minor
+                return float(".".join(version_parts))
     except (IOError, ValueError, IndexError) as e:
         logging.debug(f"ROCm version file check failed: {e}")
-    
+
     return None
 
+
 def detect_gpu_vendor() -> tuple[bool, bool]:
     """Detect GPU vendor (NVIDIA or AMD/ROCm).
-    
+
     Returns:
         tuple[bool, bool]: (is_nvidia, is_rocm)
-        
+
     Raises:
         ValueError: If no GPU management tools are found.
     """
@@ -116,78 +122,96 @@ def detect_gpu_vendor() -> tuple[bool, bool]:
 
 def initialize_profiler_utils(is_nvidia: bool, is_rocm: bool) -> Any:
     """Initialize the appropriate profiler utility based on GPU vendor.
-    
+
     Args:
         is_nvidia: Whether NVIDIA GPU is detected.
         is_rocm: Whether AMD ROCm GPU is detected.
-        
+
     Returns:
         Any: The ProfUtils class for the detected GPU vendor.
-        
+
     Raises:
         ImportError: If the required profiler utility cannot be imported.
     """
     if is_nvidia:
         try:
             from pynvml_utils import ProfUtils
+
             return ProfUtils
         except ImportError as e:
             raise ImportError(f"Could not import pynvml_utils.py: {e}")
-    
+
     # ROCm path: choose between rocm-smi and amd-smi based on version
     rocm_version = get_rocm_version()
     use_amd_smi = False
-    
+
     logging.info(f"Detected ROCm version: {rocm_version}")
     logging.info(f"amd-smi available: {check_amd_smi_available()}")
-    
+
     if rocm_version is not None and rocm_version >= 6.4:
         # ROCm >= 6.4: prefer amd-smi if available
         if check_amd_smi_available():
             use_amd_smi = True
             logging.info(f"Using amd-smi for ROCm {rocm_version}")
         else:
-            logging.warning(f"ROCm {rocm_version} detected but amd-smi not available, using rocm-smi")
+            logging.warning(
+                f"ROCm {rocm_version} detected but amd-smi not available, using rocm-smi"
+            )
     else:
         logging.info(f"ROCm {rocm_version} < 6.4, using rocm-smi")
-    
+
     if use_amd_smi:
         try:
             from amd_smi_utils import ProfUtils
+
             logging.info("Successfully imported amd_smi_utils")
             return ProfUtils
         except ImportError as import_err:
             # Fallback to rocm-smi if amd-smi import fails
-            logging.warning(f"amd-smi import failed: {import_err}, falling back to rocm-smi")
+            logging.warning(
+                f"amd-smi import failed: {import_err}, falling back to rocm-smi"
+            )
             try:
                 from rocm_smi_utils import ProfUtils
+
                 return ProfUtils
             except ImportError as e:
-                raise ImportError(f"Could not import amd_smi_utils.py or rocm_smi_utils.py: {e}")
+                raise ImportError(
+                    f"Could not import amd_smi_utils.py or rocm_smi_utils.py: {e}"
+                )
         except Exception as init_err:
             # Catch initialization errors from amd_smi_utils.__init__
-            logging.warning(f"amd-smi initialization failed: {init_err}, falling back to rocm-smi")
+            logging.warning(
+                f"amd-smi initialization failed: {init_err}, falling back to rocm-smi"
+            )
             try:
                 from rocm_smi_utils import ProfUtils
+
                 return ProfUtils
             except ImportError as e:
-                raise ImportError(f"Could not import rocm_smi_utils.py after amd-smi init failed: {e}")
+                raise ImportError(
+                    f"Could not import rocm_smi_utils.py after amd-smi init failed: {e}"
+                )
     else:
         # ROCm < 6.4 or amd-smi not available: use rocm-smi
         try:
             from rocm_smi_utils import ProfUtils
+
             return ProfUtils
         except ImportError as e:
             raise ImportError(f"Could not import rocm_smi_utils.py: {e}")
 
 
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 
 # Module-level GPU detection (performed once at import, but deferred if used as library)
 IS_NVIDIA: bool = False
 IS_ROCM: bool = False
 _GPU_DETECTED: bool = False
 
+
 def _ensure_gpu_detected() -> None:
     """Ensure GPU vendor detection has been performed."""
     global IS_NVIDIA, IS_ROCM, _GPU_DETECTED
@@ -198,12 +222,12 @@ def _ensure_gpu_detected() -> None:
 
 def run_command(commandstring: str) -> None:
     """Run the command string.
-    
+
     This function runs the command string.
-    
+
     Args:
         commandstring (str): The command string to run.
-    
+
     Raises:
         subprocess.CalledProcessError: If the command fails.
     """
@@ -213,10 +237,10 @@ def run_command(commandstring: str) -> None:
 
 def run_command0(commandstring: str) -> None:
     """Run command on GPU device 0.
-    
+
     Args:
         commandstring: The command string to run.
-    
+
     Raises:
         subprocess.CalledProcessError: If the command fails.
     """
@@ -228,10 +252,10 @@ def run_command0(commandstring: str) -> None:
 
 def run_command1(commandstring: str) -> None:
     """Run command on GPU device 1.
-    
+
     Args:
         commandstring: The command string to run.
-    
+
     Raises:
         subprocess.CalledProcessError: If the command fails.
     """
@@ -243,15 +267,18 @@ def run_command1(commandstring: str) -> None:
 
 class EventController(threading.Thread):
     """Thread to control workload execution and synchronize with profilers.
-    
+
     Attributes:
         event: Threading event to signal profiler start/stop.
         commandstring: The command string to execute.
         dual_gcd: Whether to run workload on dual GCDs (AMD-specific).
     """
-    def __init__(self, event: threading.Event, commandstring: str, dual_gcd: str, profiler: Any) -> None:
+
+    def __init__(
+        self, event: threading.Event, commandstring: str, dual_gcd: str, profiler: Any
+    ) -> None:
         """Initialize the event controller.
-        
+
         Args:
             event: Threading event for synchronization.
             commandstring: Command to execute.
@@ -266,7 +293,7 @@ def __init__(self, event: threading.Event, commandstring: str, dual_gcd: str, pr
 
     def run(self) -> None:
         """Execute workload and control profiler lifecycle.
-        
+
         Raises:
             EnvironmentError: If dual GCD mode is requested but not available.
         """
@@ -275,20 +302,20 @@ def run(self) -> None:
         time.sleep(1)  # Allow profiler to initialize
 
         n_devices = len(self.profiler.list_devices())
-        
+
         # Dual GCD mode (AMD-specific)
         if IS_ROCM and n_devices == 2 and self.dual_gcd == "true":
             logging.info("Running workload on both GCDs")
             p0 = multiprocessing.Process(target=run_command0, args=[self.commandstring])
             p1 = multiprocessing.Process(target=run_command1, args=[self.commandstring])
-            
+
             logging.info("Workload starting...")
             p0.start()
             p1.start()
             p0.join()
             p1.join()
             logging.info("Workload completed")
-            
+
         elif IS_ROCM and n_devices != 2 and self.dual_gcd == "true":
             self.event.clear()
             raise EnvironmentError(
@@ -308,16 +335,19 @@ def run(self) -> None:
 
 class ProfilerThread(threading.Thread):
     """Base thread class for GPU profiling.
-    
+
     Attributes:
         data: List of profiling samples collected.
         devices: List of GPU device IDs to profile.
         sampling_rate: Time interval between samples (seconds).
         event: Threading event for synchronization with workload.
     """
-    def __init__(self, devices: List[int], sampling_rate: float, event: threading.Event) -> None:
+
+    def __init__(
+        self, devices: List[int], sampling_rate: float, event: threading.Event
+    ) -> None:
         """Initialize the profiler thread.
-        
+
         Args:
             devices: List of GPU device IDs to profile.
             sampling_rate: Sampling interval in seconds.
@@ -331,59 +361,68 @@ def __init__(self, devices: List[int], sampling_rate: float, event: threading.Ev
 
     def run(self, prof_fun: Any, header_string: str) -> None:
         """Execute profiling loop.
-        
+
         Args:
             prof_fun: Function to call for getting metric value for a device.
             header_string: Column header prefix for CSV output.
         """
         self.event.wait()  # Wait for workload to start
         logging.info("Profiler started")
-        
+
         while self.event.is_set():
             now = datetime.datetime.now()
             row: Dict[str, Any] = {"time": now.strftime("%Y-%m-%d %H:%M:%S.%f")}
-            
+
             for device_id in self.devices:
                 current_val = prof_fun(device_id)
                 row[f"{header_string}{device_id}"] = current_val
-            
+
             logging.debug(f"Sample: {row}")
             self.data.append(row)
             time.sleep(self.sampling_rate)
-        
+
         logging.info(f"Profiler stopped. Collected {len(self.data)} samples")
 
 
 class PowerProfiler(ProfilerThread):
     """Thread for profiling GPU power consumption.
-    
+
     Attributes:
         prof_fun: Function to get power metric.
         header_string: CSV column header prefix.
     """
-    def __init__(self, devices: List[int], sampling_rate: float, event: threading.Event, 
-                 profiler: Any, device_filter: str) -> None:
+
+    def __init__(
+        self,
+        devices: List[int],
+        sampling_rate: float,
+        event: threading.Event,
+        profiler: Any,
+        device_filter: str,
+    ) -> None:
         """Initialize the power profiler.
-        
+
         Args:
             devices: List of GPU device IDs to profile.
             sampling_rate: Sampling interval in seconds.
             event: Threading event for synchronization.
             profiler: GPU profiler utility instance.
             device_filter: Device filter string ("all" or specific device).
-        
+
         Raises:
             ValueError: If a specified device is a secondary die (AMD-specific).
         """
         super().__init__(devices, sampling_rate, event)
-        
+
         # AMD-specific: Filter out secondary dies
         if IS_ROCM and device_filter != "all":
             for device_id in self.devices:
                 if profiler.check_if_secondary_die(device_id):
                     raise ValueError(f"Device {device_id} is a secondary die")
         elif IS_ROCM and device_filter == "all":
-            self.devices = [d for d in self.devices if not profiler.check_if_secondary_die(d)]
+            self.devices = [
+                d for d in self.devices if not profiler.check_if_secondary_die(d)
+            ]
 
         self.prof_fun = profiler.get_power
         self.header_string = "Power(Watt) GPU"
@@ -395,15 +434,21 @@ def run(self) -> None:
 
 class VRAMProfiler(ProfilerThread):
     """Thread for profiling GPU VRAM/memory usage.
-    
+
     Attributes:
         prof_fun: Function to get memory metric.
         header_string: CSV column header prefix.
     """
-    def __init__(self, devices: List[int], sampling_rate: float, event: threading.Event, 
-                 profiler: Any) -> None:
+
+    def __init__(
+        self,
+        devices: List[int],
+        sampling_rate: float,
+        event: threading.Event,
+        profiler: Any,
+    ) -> None:
         """Initialize the VRAM profiler.
-        
+
         Args:
             devices: List of GPU device IDs to profile.
             sampling_rate: Sampling interval in seconds.
@@ -421,13 +466,13 @@ def run(self) -> None:
 
 def main() -> None:
     """Profile GPU usage during workload execution.
-    
+
     Reads configuration from environment variables:
         MODE: "power" or "vram"
         DEVICE: Comma-separated device IDs or "all"
         SAMPLING_RATE: Sampling interval in seconds
         DUAL_GCD: "true" to enable dual GCD mode (AMD-specific)
-    
+
     Raises:
         ValueError: If MODE is invalid or required env vars are missing.
         EnvironmentError: If dual GCD mode is incompatible with system.
@@ -441,13 +486,13 @@ def main() -> None:
             commandstring += f'"{arg}" '
         else:
             commandstring += f"{arg} "
-    
+
     # Get required environment variables
     mode = os.environ.get("MODE")
     device = os.environ.get("DEVICE")
     sampling_rate_str = os.environ.get("SAMPLING_RATE")
     dual_gcd = os.environ.get("DUAL_GCD", "false")
-    
+
     # Validate environment variables
     if not mode:
         raise ValueError("MODE environment variable is required")
@@ -455,15 +500,15 @@ def main() -> None:
         raise ValueError("DEVICE environment variable is required")
     if not sampling_rate_str:
         raise ValueError("SAMPLING_RATE environment variable is required")
-    
+
     try:
         sampling_rate = float(sampling_rate_str)
     except ValueError:
         raise ValueError(f"Invalid SAMPLING_RATE: {sampling_rate_str}")
-    
+
     if mode not in ["power", "vram"]:
         raise ValueError(f"Invalid MODE: {mode}. Must be 'power' or 'vram'")
-    
+
     # Initialize profiler utility
     prof_utils_class = initialize_profiler_utils(IS_NVIDIA, IS_ROCM)
     try:
@@ -475,62 +520,64 @@ def main() -> None:
             logging.warning("Attempting fallback to rocm-smi")
             try:
                 from rocm_smi_utils import ProfUtils as RocmSmiProfUtils
+
                 profiler = RocmSmiProfUtils(mode)
                 logging.info("Successfully fell back to rocm-smi")
             except Exception as fallback_err:
-                raise RuntimeError(f"Failed to initialize both amd-smi and rocm-smi: {e}, {fallback_err}")
+                raise RuntimeError(
+                    f"Failed to initialize both amd-smi and rocm-smi: {e}, {fallback_err}"
+                )
         else:
             raise
-    
+
     # Create synchronization event
     event = threading.Event()
 
     # Parse device list
     device_list = device.split(",")
-    
+
     if len(device_list) == 1 and device_list[0] == "all":
         device_list = profiler.list_devices()
     elif len(device_list) == 1 and device_list[0].isdigit():
         device_list = [int(device_list[0])]
     else:
         device_list = [int(d) for d in device_list]
-    
-    logging.info(f"Profiling mode: {mode}, devices: {device_list}, sampling rate: {sampling_rate}s")
+
+    logging.info(
+        f"Profiling mode: {mode}, devices: {device_list}, sampling rate: {sampling_rate}s"
+    )
 
     # Create threads
     workload_thread = EventController(
-        event=event,
-        commandstring=commandstring,
-        dual_gcd=dual_gcd,
-        profiler=profiler
+        event=event, commandstring=commandstring, dual_gcd=dual_gcd, profiler=profiler
     )
-    
+
     if mode == "power":
         profiler_thread = PowerProfiler(
             devices=device_list,
             sampling_rate=sampling_rate,
             event=event,
             profiler=profiler,
-            device_filter=device
+            device_filter=device,
         )
     else:  # mode == "vram"
         profiler_thread = VRAMProfiler(
             devices=device_list,
             sampling_rate=sampling_rate,
             event=event,
-            profiler=profiler
+            profiler=profiler,
         )
 
     # Global flag for signal handling
     shutdown_requested = threading.Event()
-    
+
     def signal_handler(signum, frame):
         """Handle SIGTERM/SIGINT to gracefully shutdown."""
         logging.info(f"Received signal {signum}, initiating graceful shutdown...")
         shutdown_requested.set()
         # Stop the profiler event to signal threads to stop
         event.clear()
-    
+
     # Register signal handlers
     signal.signal(signal.SIGTERM, signal_handler)
     signal.signal(signal.SIGINT, signal_handler)
@@ -538,23 +585,23 @@ def signal_handler(signum, frame):
     # Execute profiling
     workload_thread.start()
     profiler_thread.start()
-    
+
     # Wait for either workload completion or shutdown signal
     workload_thread.join(timeout=1)
     while workload_thread.is_alive() and not shutdown_requested.is_set():
         time.sleep(0.1)
-    
+
     # If shutdown was requested, clear event to stop profiler
     if shutdown_requested.is_set():
         event.clear()
         logging.info("Shutdown requested, stopping profiler thread...")
-    
+
     # Wait for profiler thread to finish
     profiler_thread.join(timeout=5)
-    
+
     # Write results to CSV
     output_file = os.environ.get("OUTPUT_FILE", "prof.csv")
-    
+
     if not profiler_thread.data:
         logging.warning("No profiling data collected")
         # Don't exit with error if we got a shutdown signal - this is expected
@@ -562,15 +609,17 @@ def signal_handler(signum, frame):
             sys.exit(1)
     else:
         try:
-            with open(output_file, "w", newline='') as csvfile:
-                writer = csv.DictWriter(csvfile, fieldnames=profiler_thread.data[0].keys())
+            with open(output_file, "w", newline="") as csvfile:
+                writer = csv.DictWriter(
+                    csvfile, fieldnames=profiler_thread.data[0].keys()
+                )
                 writer.writeheader()
                 writer.writerows(profiler_thread.data)
             logging.info(f"Profiling data written to {output_file}")
         except IOError as e:
             logging.error(f"Failed to write output file: {e}")
             sys.exit(1)
-    
+
 
 if __name__ == "__main__":
     main()
diff --git a/src/madengine/scripts/common/tools/pynvml_utils.py b/src/madengine/scripts/common/tools/pynvml_utils.py
index e1915895..8c81d941 100644
--- a/src/madengine/scripts/common/tools/pynvml_utils.py
+++ b/src/madengine/scripts/common/tools/pynvml_utils.py
@@ -6,10 +6,11 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
+import logging
+
 # built-in modules
 import typing
-import logging
-from typing import Optional, List
+from typing import List, Optional
 
 # third-party modules
 import pynvml
@@ -17,7 +18,7 @@
 
 class ProfUtils:
     """Class to get GPU information using NVIDIA pynvml library.
-    
+
     Attributes:
         device_count: Number of NVIDIA GPUs detected.
         handles: List of NVML device handles.
@@ -26,10 +27,10 @@ class ProfUtils:
 
     def __init__(self, mode: str) -> None:
         """Initialize the NVIDIA profiler utility.
-        
+
         Args:
             mode: Mode parameter for API compatibility (not used for NVIDIA).
-            
+
         Raises:
             RuntimeError: If NVML initialization fails.
         """
@@ -37,15 +38,15 @@ def __init__(self, mode: str) -> None:
             pynvml.nvmlInit()
         except pynvml.NVMLError as e:
             raise RuntimeError(f"Failed to initialize NVML: {e}")
-        
+
         try:
             self.device_count = pynvml.nvmlDeviceGetCount()
         except pynvml.NVMLError as e:
             raise RuntimeError(f"Failed to get device count: {e}")
-        
+
         self.handles: List = []
         self.device_list: List[int] = []
-        
+
         for i in range(self.device_count):
             try:
                 self.device_list.append(i)
@@ -55,17 +56,17 @@ def __init__(self, mode: str) -> None:
 
     def get_power(self, device: int) -> str:
         """Get current power consumption of a GPU device.
-        
+
         Args:
             device: GPU device index.
-            
+
         Returns:
             Power consumption in watts as string, or 'N/A' if unavailable.
         """
         if device < 0 or device >= len(self.handles):
             logging.error(f"Invalid device index: {device}")
-            return 'N/A'
-        
+            return "N/A"
+
         try:
             # nvmlDeviceGetPowerUsage returns milliwatts
             power_mw = pynvml.nvmlDeviceGetPowerUsage(self.handles[device])
@@ -73,11 +74,11 @@ def get_power(self, device: int) -> str:
             return str(round(power_watts, 2))
         except pynvml.NVMLError as e:
             logging.debug(f"Failed to get power for device {device}: {e}")
-            return 'N/A'
+            return "N/A"
 
     def list_devices(self) -> List[int]:
         """Get list of available GPU device indices.
-        
+
         Returns:
             List of device indices.
         """
@@ -85,17 +86,17 @@ def list_devices(self) -> List[int]:
 
     def get_mem_info(self, device: int) -> float:
         """Get memory usage percentage for a GPU device.
-        
+
         Args:
             device: GPU device index.
-            
+
         Returns:
             Memory usage percentage as float (0-100).
         """
         if device < 0 or device >= len(self.handles):
             logging.error(f"Invalid device index: {device}")
             return 0.0
-        
+
         try:
             info = pynvml.nvmlDeviceGetMemoryInfo(self.handles[device])
             if info.total > 0:
@@ -108,13 +109,13 @@ def get_mem_info(self, device: int) -> float:
 
     def check_if_secondary_die(self, device: int) -> bool:
         """Check if device is a secondary die.
-        
+
         This method is provided for API compatibility with AMD utils.
         NVIDIA GPUs do not have the concept of secondary dies like AMD MCM GPUs.
-        
+
         Args:
             device: GPU device index.
-            
+
         Returns:
             Always False for NVIDIA GPUs.
         """
diff --git a/src/madengine/scripts/common/tools/rocm_smi_utils.py b/src/madengine/scripts/common/tools/rocm_smi_utils.py
index dd73219b..fdbf9190 100644
--- a/src/madengine/scripts/common/tools/rocm_smi_utils.py
+++ b/src/madengine/scripts/common/tools/rocm_smi_utils.py
@@ -6,8 +6,8 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
-import sys
 import logging
+import sys
 from typing import List
 
 sys.path.append("/opt/rocm/libexec/rocm_smi/")
@@ -20,11 +20,12 @@
 
 class ProfUtils:
     """Class to get GPU information using AMD rocm_smi utility.
-    
+
     Attributes:
         rocm6: Whether ROCm 6+ API is available.
         rocmsmi: ROCm SMI bindings instance.
     """
+
     def __init__(self, mode) -> None:
         self.rocm6 = False
         try:
@@ -32,10 +33,14 @@ def __init__(self, mode) -> None:
             if rocm_smi.driverInitialized() is True:
                 ret_init = self.rocmsmi.rsmi_init(0)
                 if ret_init != 0:
-                    raise ValueError('ROCm SMI returned %s (the expected value is 0)', ret_init)
+                    raise ValueError(
+                        "ROCm SMI returned %s (the expected value is 0)", ret_init
+                    )
                     exit(ret_init)
             else:
-                raise ImportError('Driver not initialized (amdgpu not found in modules)')
+                raise ImportError(
+                    "Driver not initialized (amdgpu not found in modules)"
+                )
                 exit(0)
             self.rocm6 = True
         except Exception:
@@ -43,32 +48,32 @@ def __init__(self, mode) -> None:
 
     def get_power(self, device: int) -> str:
         """Get current socket power of a given device.
-        
+
         Args:
             device: DRM device identifier.
-            
+
         Returns:
             Power consumption in watts as string, or 'N/A' if unavailable.
         """
         if self.rocm6:
             power = c_uint32()
             ret = self.rocmsmi.rsmi_dev_power_ave_get(device, 0, byref(power))
-            if rocm_smi.rsmi_ret_ok(ret, device, 'get_socket_power', False):
+            if rocm_smi.rsmi_ret_ok(ret, device, "get_socket_power", False):
                 return str(power.value / 1000000)
-            return 'N/A'
+            return "N/A"
         else:
             return rocm_smi.getPower(device)
 
     def list_devices(self) -> List[int]:
         """Get list of GPU device indices.
-        
+
         Returns:
             List of device indices.
         """
         if self.rocm6:
             numberOfDevices = c_uint32(0)
             ret = self.rocmsmi.rsmi_num_monitor_devices(byref(numberOfDevices))
-            if rocm_smi.rsmi_ret_ok(ret, metric='get_num_monitbyrefor_devices'):
+            if rocm_smi.rsmi_ret_ok(ret, metric="get_num_monitbyrefor_devices"):
                 deviceList = list(range(numberOfDevices.value))
                 return deviceList
             else:
@@ -78,31 +83,33 @@ def list_devices(self) -> List[int]:
 
     def get_mem_info(self, device: int) -> float:
         """Get memory usage percentage for a device.
-        
+
         Args:
             device: GPU device index.
-            
+
         Returns:
             Memory usage percentage as float.
         """
         if self.rocm6:
             memoryUse = c_uint64()
-            ret = self.rocmsmi.rsmi_dev_memory_busy_percent_get(device, byref(memoryUse))
-            if rocm_smi.rsmi_ret_ok(ret, device, '% memory use'):
+            ret = self.rocmsmi.rsmi_dev_memory_busy_percent_get(
+                device, byref(memoryUse)
+            )
+            if rocm_smi.rsmi_ret_ok(ret, device, "% memory use"):
                 return memoryUse.value
         else:
             (memUsed, memTotal) = rocm_smi.getMemInfo(device, "vram")
-            return round(float(memUsed)/float(memTotal) * 100, 2)
+            return round(float(memUsed) / float(memTotal) * 100, 2)
 
     def check_if_secondary_die(self, device: int) -> bool:
         """Check if GCD(die) is the secondary die in a MCM.
-        
+
         MI200 device specific feature check.
         The secondary dies lack power management features.
-        
+
         Args:
             device: The device to check.
-            
+
         Returns:
             True if secondary die, False otherwise.
         """
@@ -112,8 +119,14 @@ def check_if_secondary_die(self, device: int) -> bool:
             timestamp = c_uint64()
 
             # secondary die can be determined by checking if energy counter == 0
-            ret = self.rocmsmi.rsmi_dev_energy_count_get(device, byref(energy_count), byref(counter_resoution), byref(timestamp))
-            if (rocm_smi.rsmi_ret_ok(ret, None, 'energy_count_secondary_die_check', silent=False)) and (energy_count.value == 0):
+            ret = self.rocmsmi.rsmi_dev_energy_count_get(
+                device, byref(energy_count), byref(counter_resoution), byref(timestamp)
+            )
+            if (
+                rocm_smi.rsmi_ret_ok(
+                    ret, None, "energy_count_secondary_die_check", silent=False
+                )
+            ) and (energy_count.value == 0):
                 return True
             return False
         else:
diff --git a/src/madengine/scripts/common/tools/rocprof_counter_csv_to_instruction_histogram.py b/src/madengine/scripts/common/tools/rocprof_counter_csv_to_instruction_histogram.py
index 386bf08f..27c44f2b 100644
--- a/src/madengine/scripts/common/tools/rocprof_counter_csv_to_instruction_histogram.py
+++ b/src/madengine/scripts/common/tools/rocprof_counter_csv_to_instruction_histogram.py
@@ -66,13 +66,19 @@ def parse_csv(path: Path) -> dict:
 
 def main() -> int:
     if len(sys.argv) < 2:
-        print("Usage: rocprof_counter_csv_to_instruction_histogram.py <output_dir>", file=sys.stderr)
+        print(
+            "Usage: rocprof_counter_csv_to_instruction_histogram.py <output_dir>",
+            file=sys.stderr,
+        )
         return 1
     out_dir = Path(sys.argv[1])
     if not out_dir.is_dir():
         return 0  # no dir, skip silently
     aggregated = {}
-    for name in ("rocprofv3_output_counter_collection.csv", "rocprofv3_output_domain_stats.csv"):
+    for name in (
+        "rocprofv3_output_counter_collection.csv",
+        "rocprofv3_output_domain_stats.csv",
+    ):
         path = out_dir / name
         if not path.exists():
             continue
diff --git a/src/madengine/scripts/common/tools/rocprof_wrapper.sh b/src/madengine/scripts/common/tools/rocprof_wrapper.sh
index 995e5c79..755b87e0 100755
--- a/src/madengine/scripts/common/tools/rocprof_wrapper.sh
+++ b/src/madengine/scripts/common/tools/rocprof_wrapper.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 # ROCm Profiler Wrapper - Intelligently select between rocprof (legacy) and rocprofv3 (new)
 #
 # This wrapper handles the transition from rocprof to rocprofv3 across ROCm versions.
@@ -39,17 +39,17 @@
 get_rocm_version() {
     # Try multiple methods to detect ROCm version
     local version=""
-    
+
     # Method 1: Check rocm-smi output
     if command -v rocm-smi &> /dev/null; then
         version=$(rocm-smi --version 2>/dev/null | grep -oP 'ROCm version: \K[0-9]+\.[0-9]+\.[0-9]+' | head -1)
     fi
-    
+
     # Method 2: Check /opt/rocm/.info/version file
     if [ -z "$version" ] && [ -f /opt/rocm/.info/version ]; then
         version=$(cat /opt/rocm/.info/version)
     fi
-    
+
     # Method 3: Check ROCM_PATH or default ROCm installation
     if [ -z "$version" ]; then
         local rocm_path="${ROCM_PATH:-/opt/rocm}"
@@ -57,7 +57,7 @@ get_rocm_version() {
             version=$(cat "$rocm_path/.info/version")
         fi
     fi
-    
+
     echo "$version"
 }
 
@@ -72,13 +72,13 @@ version_gte() {
 # Function to detect available profiler
 detect_profiler() {
     local rocm_version=$(get_rocm_version)
-    
+
     # Check if rocprofv3 is available
     if command -v rocprofv3 &> /dev/null; then
         echo "rocprofv3"
         return 0
     fi
-    
+
     # Check if rocprof (legacy) is available
     if command -v rocprof &> /dev/null; then
         # For ROCm >= 7.0, warn that rocprofv3 should be available
@@ -88,7 +88,7 @@ detect_profiler() {
         echo "rocprof"
         return 0
     fi
-    
+
     # No profiler found
     echo "Error: Neither rocprofv3 nor rocprof found in PATH" >&2
     echo "Please ensure ROCm profiler tools are installed" >&2
@@ -99,11 +99,11 @@ detect_profiler() {
 main() {
     local profiler=$(detect_profiler)
     local exit_code=$?
-    
+
     if [ $exit_code -ne 0 ]; then
         return 1
     fi
-    
+
     # Execute the detected profiler with all passed arguments
     if [ "$profiler" = "rocprof" ]; then
         # Legacy rocprof syntax: rocprof [options] <app> [args]
@@ -117,21 +117,21 @@ main() {
         local profiler_opts=()
         local app_cmd=()
         local found_separator=false
-        
+
         for arg in "$@"; do
             if [ "$arg" = "--" ]; then
                 # Found the separator, everything after this is the application command
                 found_separator=true
                 continue
             fi
-            
+
             if [ "$found_separator" = true ]; then
                 app_cmd+=("$arg")
             else
                 profiler_opts+=("$arg")
             fi
         done
-        
+
         # Build command with proper argument placement.
         # Filter known-noisy rocprofv3/generateRocpd stderr: "sql text value for value is empty. Using NULL instead"
         # (ROCm writes this for every empty string->NULL in the SQLite DB; harmless but floods logs.)
@@ -148,4 +148,3 @@ main() {
 
 # Run main function
 main "$@"
-
diff --git a/src/madengine/scripts/common/tools/therock_detector.py b/src/madengine/scripts/common/tools/therock_detector.py
index 557ba55d..97dbf169 100755
--- a/src/madengine/scripts/common/tools/therock_detector.py
+++ b/src/madengine/scripts/common/tools/therock_detector.py
@@ -41,6 +41,7 @@ def _prepend_madengine_to_sys_path() -> None:
         therock_manifest_path,
     )
 except ImportError:  # pragma: no cover — script copied outside a package tree
+
     def therock_manifest_path(path: Path) -> Path:  # keep in sync with therock_markers
         return path / "share" / "therock" / "therock_manifest.json"
 
@@ -88,7 +89,7 @@ def _add_installation(self, install_type: str, path: Path, details: Dict):
             "path": str(path.resolve()),
             "details": details,
         }
-        
+
         # Avoid duplicates
         if not any(inst["path"] == installation["path"] for inst in self.installations):
             self.installations.append(installation)
@@ -97,7 +98,7 @@ def _add_installation(self, install_type: str, path: Path, details: Dict):
     def _is_therock_installation(self, path: Path) -> Optional[Dict]:
         """
         Check if a path contains TheRock installation markers.
-        
+
         Returns dict with installation details if TheRock is detected, None otherwise.
         """
         if not path.exists():
@@ -127,7 +128,9 @@ def _is_therock_installation(self, path: Path) -> Optional[Dict]:
                 with open(dist_info_path, "r") as f:
                     dist_info = json.load(f)
                     details["dist_info"] = {
-                        "amdgpu_targets": dist_info.get("dist_amdgpu_targets", "unknown"),
+                        "amdgpu_targets": dist_info.get(
+                            "dist_amdgpu_targets", "unknown"
+                        ),
                     }
             except Exception as e:
                 self.log(f"Error reading dist_info: {e}")
@@ -153,20 +156,20 @@ def _is_therock_installation(self, path: Path) -> Optional[Dict]:
         # If we found any TheRock markers, return details
         if details:
             return details
-        
+
         return None
 
     def _detect_rocm_sdk_command(self):
         """Detect rocm-sdk command in PATH (indicates pip installation)."""
         self.log("Checking for rocm-sdk command...")
-        
+
         rocm_sdk_path = shutil.which("rocm-sdk")
         if rocm_sdk_path:
             self.log(f"Found rocm-sdk at: {rocm_sdk_path}")
-            
+
             # Try to get installation details
             details = {"command_path": rocm_sdk_path}
-            
+
             # Get version
             try:
                 result = subprocess.run(
@@ -201,25 +204,26 @@ def _detect_rocm_sdk_command(self):
     def _detect_python_packages(self):
         """Detect TheRock Python packages in site-packages."""
         self.log("Checking Python site-packages...")
-        
+
         try:
             import site
             import importlib.util
-            
+
             # Check for rocm_sdk package
             spec = importlib.util.find_spec("rocm_sdk")
             if spec and spec.origin:
                 package_path = Path(spec.origin).parent
                 self.log(f"Found rocm_sdk package at: {package_path}")
-                
+
                 # Try to import and get details
                 try:
                     import rocm_sdk
+
                     details = {
                         "package_path": str(package_path),
                         "version": getattr(rocm_sdk, "__version__", "unknown"),
                     }
-                    
+
                     # Try to get rocm_sdk_core path for TheRock markers
                     core_spec = importlib.util.find_spec("_rocm_sdk_core")
                     if core_spec and core_spec.origin:
@@ -230,14 +234,14 @@ def _detect_python_packages(self):
                             self._add_installation("python_package", core_path, details)
                 except Exception as e:
                     self.log(f"Error importing rocm_sdk: {e}")
-                    
+
         except Exception as e:
             self.log(f"Error checking Python packages: {e}")
 
     def _detect_tarball_installations(self):
         """Detect tarball installations in common paths."""
         self.log("Checking common installation paths...")
-        
+
         # Common installation directories for tarballs
         common_paths = [
             Path.home() / "rocm",
@@ -246,7 +250,7 @@ def _detect_tarball_installations(self):
             Path("/usr/local/rocm"),
             Path.home() / ".local" / "rocm",
         ]
-        
+
         for path in common_paths:
             if path.exists():
                 details = self._is_therock_installation(path)
@@ -256,13 +260,13 @@ def _detect_tarball_installations(self):
     def _detect_from_env_vars(self):
         """Detect TheRock from environment variables."""
         self.log("Checking environment variables...")
-        
+
         env_vars = [
             "ROCM_PATH",
             "ROCM_HOME",
             "HIP_PATH",
         ]
-        
+
         for var in env_vars:
             value = os.environ.get(var)
             if value:
@@ -277,18 +281,20 @@ def _detect_from_env_vars(self):
     def _detect_build_directories(self):
         """Detect local TheRock build directories."""
         self.log("Checking for local build directories...")
-        
+
         # Check current directory and parent directories
         current = Path.cwd()
         for _ in range(5):  # Check up to 5 levels up
             # Check for TheRock source indicators
-            if (current / "CMakeLists.txt").exists() and (current / "version.json").exists():
+            if (current / "CMakeLists.txt").exists() and (
+                current / "version.json"
+            ).exists():
                 try:
                     with open(current / "version.json", "r") as f:
                         version_data = json.load(f)
                         if "rocm-version" in version_data:
                             self.log(f"Found TheRock source at: {current}")
-                            
+
                             # Check build directory
                             build_dir = current / "build"
                             if build_dir.exists():
@@ -296,14 +302,20 @@ def _detect_build_directories(self):
                                 if dist_dir.exists():
                                     for dist_subdir in dist_dir.iterdir():
                                         if dist_subdir.is_dir():
-                                            details = self._is_therock_installation(dist_subdir)
+                                            details = self._is_therock_installation(
+                                                dist_subdir
+                                            )
                                             if details:
                                                 details["source_path"] = str(current)
-                                                details["rocm_version"] = version_data.get("rocm-version")
-                                                self._add_installation("local_build", dist_subdir, details)
+                                                details["rocm_version"] = (
+                                                    version_data.get("rocm-version")
+                                                )
+                                                self._add_installation(
+                                                    "local_build", dist_subdir, details
+                                                )
                 except Exception as e:
                     self.log(f"Error checking build directory: {e}")
-            
+
             parent = current.parent
             if parent == current:
                 break
@@ -315,33 +327,33 @@ def format_installation_info(installation: Dict) -> str:
     lines = []
     lines.append(f"\nType: {installation['type']}")
     lines.append(f"Path: {installation['path']}")
-    
+
     details = installation.get("details", {})
-    
+
     if "version" in details:
         lines.append(f"Version: {details['version']}")
-    
+
     if "rocm_version" in details:
         lines.append(f"ROCm Version: {details['rocm_version']}")
-    
+
     if "manifest" in details:
         manifest = details["manifest"]
         lines.append(f"TheRock Commit: {manifest.get('commit', 'unknown')}")
         lines.append(f"Submodules: {manifest.get('submodules_count', 0)}")
-    
+
     if "dist_info" in details:
         dist_info = details["dist_info"]
         lines.append(f"GPU Targets: {dist_info.get('amdgpu_targets', 'unknown')}")
-    
+
     if "binaries" in details:
         lines.append(f"Compilers: {', '.join(details['binaries'])}")
-    
+
     if "command_path" in details:
         lines.append(f"Command: {details['command_path']}")
-    
+
     if "detected_via" in details:
         lines.append(f"Detected via: ${details['detected_via']}")
-    
+
     return "\n".join(lines)
 
 
@@ -358,7 +370,8 @@ def main():
         """,
     )
     parser.add_argument(
-        "-v", "--verbose",
+        "-v",
+        "--verbose",
         action="store_true",
         help="Enable verbose output",
     )
@@ -372,11 +385,11 @@ def main():
         type=Path,
         help="Check specific path for TheRock installation",
     )
-    
+
     args = parser.parse_args()
-    
+
     detector = TherockDetector(verbose=args.verbose)
-    
+
     # If specific path provided, check only that
     if args.path:
         details = detector._is_therock_installation(args.path)
@@ -393,14 +406,14 @@ def main():
     else:
         # Run full detection
         installations = detector.detect_all()
-    
+
     # Output results
     if not installations:
         print("No TheRock installations detected.")
         print("\nTheRock uses Python pip packages or tarballs, not apt.")
         print("See: https://github.com/ROCm/TheRock/blob/main/RELEASES.md")
         sys.exit(1)
-    
+
     if args.json:
         print(json.dumps(installations, indent=2))
     else:
@@ -408,9 +421,9 @@ def main():
         for i, installation in enumerate(installations, 1):
             print(f"\n{'=' * 60}")
             print(f"Installation #{i}")
-            print('=' * 60)
+            print("=" * 60)
             print(format_installation_info(installation))
-        
+
         print(f"\n{'=' * 60}")
         print("\nTheRock Installation Info:")
         print("- TheRock does NOT use apt/system packages")
@@ -418,10 +431,9 @@ def main():
         print("- Python packages install to venv site-packages")
         print("- Tarballs extract to custom directories")
         print("\nFor more info: https://github.com/ROCm/TheRock")
-    
+
     sys.exit(0)
 
 
 if __name__ == "__main__":
     main()
-
diff --git a/src/madengine/scripts/k8s/data/download_aws.sh b/src/madengine/scripts/k8s/data/download_aws.sh
index 79a705ff..0017ae38 100755
--- a/src/madengine/scripts/k8s/data/download_aws.sh
+++ b/src/madengine/scripts/k8s/data/download_aws.sh
@@ -60,4 +60,3 @@ echo "MAD_DATA_DOWNLOAD_DURATION=$DURATION" >> /tmp/mad_metrics.env
 echo "MAD_DATA_SIZE=$SIZE" >> /tmp/mad_metrics.env
 echo "MAD_DATA_PROVIDER_TYPE=aws" >> /tmp/mad_metrics.env
 echo "MAD_DATANAME=$DATANAME" >> /tmp/mad_metrics.env
-
diff --git a/src/madengine/scripts/k8s/data/download_local.sh b/src/madengine/scripts/k8s/data/download_local.sh
index 901af88c..27ad405c 100755
--- a/src/madengine/scripts/k8s/data/download_local.sh
+++ b/src/madengine/scripts/k8s/data/download_local.sh
@@ -41,4 +41,3 @@ echo "MAD_DATA_DOWNLOAD_DURATION=0" >> /tmp/mad_metrics.env
 echo "MAD_DATA_SIZE=$SIZE" >> /tmp/mad_metrics.env
 echo "MAD_DATA_PROVIDER_TYPE=local" >> /tmp/mad_metrics.env
 echo "MAD_DATANAME=$DATANAME" >> /tmp/mad_metrics.env
-
diff --git a/src/madengine/scripts/k8s/data/download_minio.sh b/src/madengine/scripts/k8s/data/download_minio.sh
index f0da3932..03d66d82 100755
--- a/src/madengine/scripts/k8s/data/download_minio.sh
+++ b/src/madengine/scripts/k8s/data/download_minio.sh
@@ -79,4 +79,3 @@ echo "MAD_DATA_DOWNLOAD_DURATION=$DURATION" >> /tmp/mad_metrics.env
 echo "MAD_DATA_SIZE=$SIZE" >> /tmp/mad_metrics.env
 echo "MAD_DATA_PROVIDER_TYPE=minio" >> /tmp/mad_metrics.env
 echo "MAD_DATANAME=$DATANAME" >> /tmp/mad_metrics.env
-
diff --git a/src/madengine/scripts/k8s/data/download_nas.sh b/src/madengine/scripts/k8s/data/download_nas.sh
index 45e062d8..5d3e23d8 100755
--- a/src/madengine/scripts/k8s/data/download_nas.sh
+++ b/src/madengine/scripts/k8s/data/download_nas.sh
@@ -24,14 +24,14 @@ NAS_PASS=${NAS_PASSWORD}
 # If credentials not in environment, try to read from credential.json
 if [ -z "$NAS_PASS" ] && [ -f "/workspace/credential.json" ]; then
     echo "Reading NAS credentials from credential.json..."
-    
+
     # Extract NAS node info (try first node or find by hostname)
     NAS_HOST=$(python3 -c "import json; f=open('/workspace/credential.json'); d=json.load(f); nodes=d.get('NAS_NODES', []); print(nodes[0].get('HOST', 'mlse-nas.amd.com') if nodes else 'mlse-nas.amd.com')" 2>/dev/null || echo "mlse-nas.amd.com")
-    
+
     NAS_PORT=$(python3 -c "import json; f=open('/workspace/credential.json'); d=json.load(f); nodes=d.get('NAS_NODES', []); print(nodes[0].get('PORT', '22') if nodes else '22')" 2>/dev/null || echo "22")
-    
+
     NAS_USER=$(python3 -c "import json; f=open('/workspace/credential.json'); d=json.load(f); nodes=d.get('NAS_NODES', []); print(nodes[0].get('USERNAME', 'datum') if nodes else 'datum')" 2>/dev/null || echo "datum")
-    
+
     NAS_PASS=$(python3 -c "import json; f=open('/workspace/credential.json'); d=json.load(f); nodes=d.get('NAS_NODES', []); print(nodes[0].get('PASSWORD', '') if nodes else '')" 2>/dev/null || echo "")
 fi
 
@@ -86,4 +86,3 @@ echo "MAD_DATA_DOWNLOAD_DURATION=$DURATION" >> /tmp/mad_metrics.env
 echo "MAD_DATA_SIZE=$SIZE" >> /tmp/mad_metrics.env
 echo "MAD_DATA_PROVIDER_TYPE=nas" >> /tmp/mad_metrics.env
 echo "MAD_DATANAME=$DATANAME" >> /tmp/mad_metrics.env
-
diff --git a/src/madengine/scripts/k8s/tools.json b/src/madengine/scripts/k8s/tools.json
index c7a3398e..874183f9 100644
--- a/src/madengine/scripts/k8s/tools.json
+++ b/src/madengine/scripts/k8s/tools.json
@@ -1,7 +1,7 @@
 {
   "_comment": "madengine K8s Tools Configuration",
   "_description": "Configuration for K8s-specific tools and data providers",
-  
+
   "data_providers": {
     "minio": {
       "script": "scripts/k8s/data/download_minio.sh",
@@ -31,7 +31,7 @@
       "env_vars": {}
     }
   },
-  
+
   "wrappers": {
     "gpu_profiler": {
       "script": "scripts/k8s/wrappers/run_profiler.sh",
@@ -48,7 +48,7 @@
       "env_vars": {}
     }
   },
-  
+
   "shared_tools": {
     "_note": "These tools from scripts/common/ work directly in K8s without wrappers",
     "tools": [
@@ -78,7 +78,7 @@
       }
     ]
   },
-  
+
   "pre_scripts": [
     {
       "name": "gpu_info_pre",
@@ -87,7 +87,7 @@
       "description": "Pre-execution GPU status check"
     }
   ],
-  
+
   "post_scripts": [
     {
       "name": "gpu_info_post",
@@ -97,4 +97,3 @@
     }
   ]
 }
-
diff --git a/src/madengine/scripts/k8s/wrappers/run_profiler.sh b/src/madengine/scripts/k8s/wrappers/run_profiler.sh
index 17bd125c..392e85ac 100755
--- a/src/madengine/scripts/k8s/wrappers/run_profiler.sh
+++ b/src/madengine/scripts/k8s/wrappers/run_profiler.sh
@@ -47,4 +47,3 @@ if [ -d "$OUTPUT_DIR" ]; then
     echo "Output files:"
     ls -lh $OUTPUT_DIR
 fi
-
diff --git a/src/madengine/scripts/k8s/wrappers/run_rocenv.sh b/src/madengine/scripts/k8s/wrappers/run_rocenv.sh
index c26ad9d5..d9324f66 100755
--- a/src/madengine/scripts/k8s/wrappers/run_rocenv.sh
+++ b/src/madengine/scripts/k8s/wrappers/run_rocenv.sh
@@ -57,4 +57,3 @@ echo "Results saved to: /workspace/$OUTPUT_NAME.csv"
 if [ -f "/workspace/$OUT_CSV" ]; then
     echo "CSV file size: $(du -h /workspace/$OUT_CSV | cut -f1)"
 fi
-
diff --git a/src/madengine/scripts/slurm/epilog.sh b/src/madengine/scripts/slurm/epilog.sh
index 6f7b68e2..ab707264 100644
--- a/src/madengine/scripts/slurm/epilog.sh
+++ b/src/madengine/scripts/slurm/epilog.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # SLURM Epilog Script for GPU Cleanup
-# 
+#
 # This script should be installed on SLURM compute nodes to ensure
 # GPU processes are properly cleaned up after each job.
 #
@@ -27,14 +27,14 @@ log_message "=== Epilog script starting ==="
 # Function to kill GPU processes
 cleanup_gpu_processes() {
     log_message "Checking for GPU processes..."
-    
+
     # Try AMD GPUs first
     if [ -x /opt/rocm/bin/amd-smi ]; then
         log_message "Detected AMD ROCm installation, checking for processes..."
-        
+
         # Get PIDs using amd-smi
         PIDS=$(amd-smi process 2>/dev/null | grep -v PID | awk '{print $1}' | grep -E '^[0-9]+$' | sort -u)
-        
+
         if [ ! -z "$PIDS" ]; then
             log_message "Found GPU processes to clean: $PIDS"
             for pid in $PIDS; do
@@ -47,7 +47,7 @@ cleanup_gpu_processes() {
         else
             log_message "No GPU processes found via amd-smi"
         fi
-        
+
         # Try fuser on GPU devices as backup
         for device in /dev/kfd /dev/dri/renderD*; do
             if [ -e "$device" ]; then
@@ -64,13 +64,13 @@ cleanup_gpu_processes() {
             fi
         done
     fi
-    
+
     # Try NVIDIA GPUs
     if [ -x /usr/bin/nvidia-smi ]; then
         log_message "Detected NVIDIA GPU installation, checking for processes..."
-        
+
         PIDS=$(nvidia-smi --query-compute-apps=pid --format=csv,noheader 2>/dev/null | grep -E '^[0-9]+$')
-        
+
         if [ ! -z "$PIDS" ]; then
             log_message "Found NVIDIA GPU processes to clean: $PIDS"
             for pid in $PIDS; do
@@ -89,7 +89,7 @@ cleanup_gpu_processes() {
 # Function to kill Ray processes
 cleanup_ray_processes() {
     log_message "Cleaning up Ray processes..."
-    
+
     # Kill Ray worker processes
     RAY_PIDS=$(pgrep -f "ray::" 2>/dev/null || true)
     if [ ! -z "$RAY_PIDS" ]; then
@@ -99,7 +99,7 @@ cleanup_ray_processes() {
     else
         log_message "No Ray processes found"
     fi
-    
+
     # Kill vLLM worker processes
     VLLM_PIDS=$(pgrep -f "RayWorkerWrapper" 2>/dev/null || true)
     if [ ! -z "$VLLM_PIDS" ]; then
@@ -109,7 +109,7 @@ cleanup_ray_processes() {
     else
         log_message "No vLLM worker processes found"
     fi
-    
+
     # Kill any vllm processes
     VLLM_MAIN_PIDS=$(pgrep -f "vllm" 2>/dev/null || true)
     if [ ! -z "$VLLM_MAIN_PIDS" ]; then
@@ -123,7 +123,7 @@ cleanup_ray_processes() {
 cleanup_docker_containers() {
     if command -v docker &> /dev/null; then
         log_message "Checking for stale Docker containers..."
-        
+
         # Find containers that might be from madengine
         CONTAINERS=$(docker ps -q --filter "name=container_rocm" 2>/dev/null || true)
         if [ ! -z "$CONTAINERS" ]; then
@@ -142,13 +142,13 @@ cleanup_docker_containers() {
 # Function to reset GPU state
 reset_gpu_state() {
     log_message "Resetting GPU state..."
-    
+
     # AMD GPU reset
     if [ -x /opt/rocm/bin/rocm-smi ]; then
         log_message "Resetting AMD GPUs..."
         /opt/rocm/bin/rocm-smi --gpureset 2>/dev/null || log_message "GPU reset failed (may require reboot)"
     fi
-    
+
     # NVIDIA GPU reset (requires nvidia-smi)
     if [ -x /usr/bin/nvidia-smi ]; then
         log_message "Resetting NVIDIA GPUs..."
@@ -175,4 +175,3 @@ cleanup_gpu_processes
 log_message "=== Epilog script completed ==="
 
 exit 0
-
diff --git a/src/madengine/utils/__init__.py b/src/madengine/utils/__init__.py
index 8281537a..948d9796 100644
--- a/src/madengine/utils/__init__.py
+++ b/src/madengine/utils/__init__.py
@@ -4,8 +4,12 @@
 Utility modules for madengine including GPU configuration resolution and config parsing.
 """
 
-from .gpu_config import GPUConfigResolver, resolve_runtime_gpus
 from .config_parser import ConfigParser, get_config_parser
+from .gpu_config import GPUConfigResolver, resolve_runtime_gpus
 
-__all__ = ["GPUConfigResolver", "resolve_runtime_gpus", "ConfigParser", "get_config_parser"]
-
+__all__ = [
+    "GPUConfigResolver",
+    "resolve_runtime_gpus",
+    "ConfigParser",
+    "get_config_parser",
+]
diff --git a/src/madengine/utils/config_parser.py b/src/madengine/utils/config_parser.py
index 04e71f9c..585b013a 100644
--- a/src/madengine/utils/config_parser.py
+++ b/src/madengine/utils/config_parser.py
@@ -10,10 +10,10 @@
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
-import os
-import re
 import json
 import logging
+import os
+import re
 import typing
 from pathlib import Path
 
@@ -21,6 +21,7 @@
 
 try:
     import yaml
+
     YAML_AVAILABLE = True
 except ImportError:
     YAML_AVAILABLE = False
@@ -30,111 +31,109 @@
 
 class ConfigParser:
     """Parser for model configuration files.
-    
+
     This class handles parsing configuration files in various formats
     (CSV, JSON, YAML) that are referenced in model arguments.
-    
+
     Supports three usage patterns when run from MAD-internal CI:
     1. MAD-internal models: ./scripts/model/configs/
     2. MAD submodule: ./scripts/MAD/model/configs/
     3. MAD-private submodule: ./scripts/MAD-private/model/configs/
-    
+
     Also works when run standalone in MAD or MAD-private repos.
     """
-    
+
     # Known repository/submodule names to detect
-    KNOWN_REPOS = ['MAD', 'MAD-private', 'MAD-internal']
-    
+    KNOWN_REPOS = ["MAD", "MAD-private", "MAD-internal"]
+
     def __init__(self, scripts_base_dir: typing.Optional[str] = None):
         """Initialize ConfigParser.
-        
+
         Args:
-            scripts_base_dir: Base directory for scripts 
+            scripts_base_dir: Base directory for scripts
                              (e.g., "scripts/MAD-private/pyt_atom")
         """
         self.scripts_base_dir = scripts_base_dir
         self._path_cache = {}  # Cache resolved paths
-    
+
     def _extract_repo_root(self, path: str) -> typing.Optional[str]:
         """Extract repository root from a scripts path.
-        
+
         Examples:
             "scripts/MAD-private/pyt_atom" -> "scripts/MAD-private"
             "scripts/MAD/vllm" -> "scripts/MAD"
             "scripts/model" -> "scripts"
-            
+
         Args:
             path: Full or partial path containing scripts directory
-            
+
         Returns:
             Repository root path, or None if not identifiable
         """
         if not path:
             return None
-        
+
         parts = Path(path).parts
-        
+
         # Find 'scripts' in the path
         try:
-            scripts_idx = parts.index('scripts')
+            scripts_idx = parts.index("scripts")
         except ValueError:
             return None
-        
+
         # Check if next part after 'scripts' is a known repo name
         if scripts_idx + 1 < len(parts):
             next_part = parts[scripts_idx + 1]
             if next_part in self.KNOWN_REPOS:
                 # It's a submodule: scripts/MAD-private or scripts/MAD
-                return os.path.join(*parts[:scripts_idx + 2])
+                return os.path.join(*parts[: scripts_idx + 2])
             else:
                 # It's MAD-internal's own models: scripts/model -> scripts
-                return os.path.join(*parts[:scripts_idx + 1])
-        
+                return os.path.join(*parts[: scripts_idx + 1])
+
         # Just 'scripts' directory
-        return os.path.join(*parts[:scripts_idx + 1])
-    
+        return os.path.join(*parts[: scripts_idx + 1])
+
     def _build_candidate_paths(
-        self, 
-        config_path: str, 
-        model_scripts_path: str = None
+        self, config_path: str, model_scripts_path: str = None
     ) -> typing.List[str]:
         """Build list of candidate paths to try in priority order.
-        
+
         Args:
             config_path: Relative config path (e.g., "configs/default.csv")
             model_scripts_path: Path to model script file
-            
+
         Returns:
             List of full paths to try, in order of priority
         """
         candidates = []
-        
+
         # Priority 1: Relative to model's immediate directory
         # scripts/MAD-private/pyt_atom + configs/default.csv
         if model_scripts_path:
             scripts_dir = os.path.dirname(model_scripts_path)
             if scripts_dir:
                 candidates.append(os.path.join(scripts_dir, config_path))
-        
+
         # Priority 2: Relative to scripts_base_dir
         # scripts/MAD-private/pyt_atom + configs/default.csv
         if self.scripts_base_dir:
             candidates.append(os.path.join(self.scripts_base_dir, config_path))
-        
+
         # Priority 3: Relative to repository root (for shared configs)
         # This handles: scripts/MAD-private/pyt_atom -> scripts/MAD-private/configs/
         if self.scripts_base_dir:
             repo_root = self._extract_repo_root(self.scripts_base_dir)
             if repo_root:
                 candidates.append(os.path.join(repo_root, config_path))
-        
+
         if model_scripts_path:
             scripts_dir = os.path.dirname(model_scripts_path)
             if scripts_dir:
                 repo_root = self._extract_repo_root(scripts_dir)
                 if repo_root:
                     candidates.append(os.path.join(repo_root, config_path))
-        
+
         # Priority 4: Walk up from model's directory
         # Try parent directories up to repo root
         if model_scripts_path:
@@ -144,7 +143,7 @@ def _build_candidate_paths(
                 candidates.extend(
                     self._walk_up_between(config_path, scripts_dir, repo_root)
                 )
-        
+
         # Priority 5: Walk up from scripts_base_dir
         if self.scripts_base_dir:
             repo_root = self._extract_repo_root(self.scripts_base_dir)
@@ -152,7 +151,7 @@ def _build_candidate_paths(
                 candidates.extend(
                     self._walk_up_between(config_path, self.scripts_base_dir, repo_root)
                 )
-        
+
         # Remove duplicates while preserving order
         seen = set()
         unique_candidates = []
@@ -161,29 +160,26 @@ def _build_candidate_paths(
             if normalized not in seen:
                 seen.add(normalized)
                 unique_candidates.append(normalized)
-        
+
         return unique_candidates
-    
+
     def _walk_up_between(
-        self, 
-        config_path: str, 
-        start_dir: str, 
-        stop_dir: str
+        self, config_path: str, start_dir: str, stop_dir: str
     ) -> typing.List[str]:
         """Generate candidate paths by walking up from start to stop directory.
-        
+
         Args:
             config_path: Relative config path
             start_dir: Starting directory
             stop_dir: Stop at this directory (inclusive)
-            
+
         Returns:
             List of candidate paths
         """
         candidates = []
         current = os.path.abspath(start_dir)
         stop = os.path.abspath(stop_dir)
-        
+
         while current == stop or current.startswith(stop + os.sep):
             parent = os.path.dirname(current)
             if parent == current:  # Reached root
@@ -192,46 +188,44 @@ def _walk_up_between(
             candidates.append(os.path.join(current, config_path))
             if current == stop:  # Reached stop directory
                 break
-        
+
         return candidates
-    
+
     def parse_config_from_args(
-        self, 
-        args_string: str, 
-        model_scripts_path: str = None
+        self, args_string: str, model_scripts_path: str = None
     ) -> typing.Optional[str]:
         """Extract and resolve config file path from model arguments.
-        
+
         Resolution strategy:
         1. If absolute path -> verify it exists
         2. Try model's immediate directory
         3. Try scripts_base_dir
         4. Try repository root (scripts/MAD-private/, scripts/MAD/, scripts/)
         5. Walk up from model directory to repo root
-        
+
         This handles all cases:
         - MAD-internal models: scripts/model/configs/default.csv
         - MAD submodule: scripts/MAD/model/configs/default.csv
         - MAD-private submodule: scripts/MAD-private/model/configs/default.csv
         - Shared configs at repo level: scripts/MAD-private/configs/default.csv
-        
+
         Args:
             args_string: The args field from models.json
             model_scripts_path: Path to the model's script file (e.g., run.py)
-            
+
         Returns:
             Full path to config file, or None if not found
         """
         if not args_string:
             return None
-        
+
         # Look for --config argument
-        config_match = re.search(r'--config\s+([^\s]+)', args_string)
+        config_match = re.search(r"--config\s+([^\s]+)", args_string)
         if not config_match:
             return None
-        
+
         config_path = config_match.group(1)
-        
+
         # Check cache first
         cache_key = f"{config_path}::{model_scripts_path}::{self.scripts_base_dir}"
         if cache_key in self._path_cache:
@@ -240,7 +234,7 @@ def parse_config_from_args(
                 return cached_path
             else:
                 del self._path_cache[cache_key]
-        
+
         # Handle absolute paths
         if os.path.isabs(config_path):
             if os.path.exists(config_path):
@@ -249,17 +243,17 @@ def parse_config_from_args(
             else:
                 LOGGER.warning(f"Absolute config path does not exist: {config_path}")
                 return None
-        
+
         # Build and try candidate paths
         candidates = self._build_candidate_paths(config_path, model_scripts_path)
-        
+
         for candidate in candidates:
             LOGGER.debug(f"Trying config path: {candidate}")
             if os.path.exists(candidate):
                 LOGGER.info(f"Found config file at: {candidate}")
                 self._path_cache[cache_key] = candidate
                 return candidate
-        
+
         # Not found
         LOGGER.warning(
             f"Config file not found: {config_path}\n"
@@ -270,16 +264,15 @@ def parse_config_from_args(
             + (f"\n    ... and {len(candidates)-5} more" if len(candidates) > 5 else "")
         )
         return None
-    
+
     def load_config_file(
-        self, 
-        config_path: str
+        self, config_path: str
     ) -> typing.Optional[typing.Union[typing.List[dict], dict]]:
         """Load and parse a configuration file.
-        
+
         Args:
             config_path: Full path to the config file
-            
+
         Returns:
             For CSV: List of dicts (one per row, excluding empty rows)
             For JSON/YAML: Dict or list as-is from file
@@ -287,15 +280,15 @@ def load_config_file(
         """
         if not config_path or not os.path.exists(config_path):
             return None
-        
+
         file_ext = Path(config_path).suffix.lower()
-        
+
         try:
-            if file_ext == '.csv':
+            if file_ext == ".csv":
                 return self._load_csv(config_path)
-            elif file_ext == '.json':
+            elif file_ext == ".json":
                 return self._load_json(config_path)
-            elif file_ext in ['.yaml', '.yml']:
+            elif file_ext in [".yaml", ".yml"]:
                 return self._load_yaml(config_path)
             else:
                 LOGGER.warning(f"Unsupported config file format: {file_ext}")
@@ -303,133 +296,137 @@ def load_config_file(
         except Exception as e:
             LOGGER.error(f"Error loading config file {config_path}: {e}")
             return None
-    
+
     def _load_csv(self, config_path: str) -> typing.List[dict]:
         """Load CSV config file.
-        
+
         Args:
             config_path: Path to CSV file
-            
+
         Returns:
             List of dicts, one per row (excluding completely empty rows)
         """
         df = pd.read_csv(config_path)
-        
+
         # Remove rows that are completely empty (all NaN)
         # This handles blank lines in CSV files
-        df = df.dropna(how='all')
-        
+        df = df.dropna(how="all")
+
         # Convert NaN to None for JSON serialization
         df = df.where(pd.notnull(df), None)
-        
+
         # Convert to list of dicts
-        configs = df.to_dict(orient='records')
-        
+        configs = df.to_dict(orient="records")
+
         LOGGER.info(f"Loaded {len(configs)} config entries from {config_path}")
-        
+
         return configs
-    
+
     def _load_json(self, config_path: str) -> typing.Union[dict, list]:
         """Load JSON config file.
-        
+
         Args:
             config_path: Path to JSON file
-            
+
         Returns:
             Dict or list from JSON file
         """
-        with open(config_path, 'r') as f:
+        with open(config_path, "r") as f:
             return json.load(f)
-    
+
     def _load_yaml(self, config_path: str) -> typing.Union[dict, list]:
         """Load YAML config file.
-        
+
         Args:
             config_path: Path to YAML file
-            
+
         Returns:
             Dict or list from YAML file
         """
         if not YAML_AVAILABLE:
             raise ImportError("PyYAML is not installed. Cannot load YAML config files.")
-        
-        with open(config_path, 'r') as f:
+
+        with open(config_path, "r") as f:
             return yaml.safe_load(f)
-    
+
     def match_config_to_result(
-        self, 
-        configs_list: typing.List[dict], 
-        result_data: dict, 
-        model_name: str
+        self, configs_list: typing.List[dict], result_data: dict, model_name: str
     ) -> typing.Optional[dict]:
         """Match a specific result to its corresponding config.
-        
+
         For CSV configs with multiple rows (like vllm), match based on
         model name and other identifiable fields.
-        
+
         Args:
             configs_list: List of config dicts (from CSV rows)
             result_data: Single result row data
             model_name: The model name from result
-            
+
         Returns:
             Matching config dict, or None if no match found
         """
         if not configs_list:
             return None
-        
+
         # For single config, return it
         if len(configs_list) == 1:
             return configs_list[0]
-        
+
         # For multiple configs, try to match based on common fields
         for config in configs_list:
             # Try to match on 'model' field if it exists in both
-            if 'model' in config and 'model' in result_data:
+            if "model" in config and "model" in result_data:
                 # Compare normalized versions
-                config_model = str(config['model']).replace('/', '_').replace('-', '_').lower()
-                result_model = str(result_data['model']).replace('/', '_').replace('-', '_').lower()
+                config_model = (
+                    str(config["model"]).replace("/", "_").replace("-", "_").lower()
+                )
+                result_model = (
+                    str(result_data["model"])
+                    .replace("/", "_")
+                    .replace("-", "_")
+                    .lower()
+                )
                 if config_model in result_model or result_model in config_model:
                     # Additional checks for benchmark type if available
-                    if 'benchmark' in config and 'benchmark' in result_data:
-                        if config['benchmark'] == result_data['benchmark']:
+                    if "benchmark" in config and "benchmark" in result_data:
+                        if config["benchmark"] == result_data["benchmark"]:
                             return config
                     else:
                         return config
-        
+
         # If no match found, return first config as fallback
-        LOGGER.warning(f"Could not match config for result: {model_name}. Using first config.")
+        LOGGER.warning(
+            f"Could not match config for result: {model_name}. Using first config."
+        )
         return configs_list[0]
-    
+
     def parse_and_load(
-        self, 
-        args_string: str, 
-        model_scripts_path: str = None
+        self, args_string: str, model_scripts_path: str = None
     ) -> typing.Optional[typing.Union[typing.List[dict], dict]]:
         """Parse config path from args and load the config file.
-        
+
         Convenience method that combines parse_config_from_args and load_config_file.
-        
+
         Args:
             args_string: The args field from models.json
             model_scripts_path: Path to the model's script file
-            
+
         Returns:
             Config data (list of dicts for CSV, dict for JSON/YAML), or None
         """
         config_path = self.parse_config_from_args(args_string, model_scripts_path)
         if not config_path:
             return None
-        
+
         return self.load_config_file(config_path)
 
 
 def get_config_parser(scripts_base_dir: typing.Optional[str] = None) -> ConfigParser:
     """Factory function to create a ConfigParser instance.
-    
+
     Args:
         scripts_base_dir: Base directory for scripts
-        
+
     Returns:
         ConfigParser instance
     """
diff --git a/src/madengine/utils/discover_models.py b/src/madengine/utils/discover_models.py
index fe795e7b..6ae52b32 100644
--- a/src/madengine/utils/discover_models.py
+++ b/src/madengine/utils/discover_models.py
@@ -5,11 +5,12 @@
 
 # built-in modules
 import argparse
-import os
-import json
 import importlib.util
+import json
+import os
 import typing
-from dataclasses import dataclass, field, asdict
+from dataclasses import asdict, dataclass, field
+
 from rich.console import Console as RichConsole
 
 
@@ -73,51 +74,57 @@ def _setup_model_dir_if_needed(self) -> None:
         This copies docker/, scripts/, and config files (models.json, credential.json, data.json)
         from MODEL_DIR to the current working directory to support the model discovery process.
         This operation is safe for build-only (CPU) nodes as it only involves file operations.
-        
+
         MODEL_DIR defaults to "." (current directory) if not set.
         Only copies if MODEL_DIR points to a different directory than current working directory.
         """
         model_dir_env = os.environ.get("MODEL_DIR", ".")
-        
+
         # Get absolute paths to compare
         model_dir_abs = os.path.abspath(model_dir_env)
         cwd_abs = os.path.abspath(".")
-        
+
         # Only copy if MODEL_DIR points to a different directory (not current dir)
         if model_dir_abs != cwd_abs:
             import shlex
             import subprocess
             from pathlib import Path
 
-            self.rich_console.print(f"[bold cyan]📁 MODEL_DIR environment variable detected:[/bold cyan] [yellow]{model_dir_env}[/yellow]")
+            self.rich_console.print(
+                f"[bold cyan]📁 MODEL_DIR environment variable detected:[/bold cyan] [yellow]{model_dir_env}[/yellow]"
+            )
             print(f"Copying required files to current working directory: {cwd_abs}")
 
             try:
                 # Check if source directory exists
                 if not os.path.exists(model_dir_env):
-                    self.rich_console.print(f"[yellow]⚠️  Warning: MODEL_DIR path does not exist: {model_dir_env}[/yellow]")
+                    self.rich_console.print(
+                        f"[yellow]⚠️  Warning: MODEL_DIR path does not exist: {model_dir_env}[/yellow]"
+                    )
                     return
 
                 # Copy specific directories and files only (not everything with /*)
                 # This prevents copying unwanted subdirectories from MODEL_DIR
                 items_to_copy = []
-                
+
                 # Directories to copy
                 for subdir in ["docker", "scripts"]:
                     src_path = Path(model_dir_env) / subdir
                     if src_path.exists():
                         items_to_copy.append((src_path, subdir, "directory"))
-                
+
                 # Files to copy
                 for file in ["models.json", "credential.json", "data.json"]:
                     src_file = Path(model_dir_env) / file
                     if src_file.exists():
                         items_to_copy.append((src_file, file, "file"))
-                
+
                 if not items_to_copy:
-                    self.rich_console.print(f"[yellow]⚠️  No required files/directories found in MODEL_DIR[/yellow]")
+                    self.rich_console.print(
+                        f"[yellow]⚠️  No required files/directories found in MODEL_DIR[/yellow]"
+                    )
                     return
-                
+
                 # Copy each item
                 copied_count = 0
                 for src_path, item_name, item_type in items_to_copy:
@@ -127,7 +134,7 @@ def _setup_model_dir_if_needed(self) -> None:
                             cmd, shell=True, capture_output=True, text=True, check=True
                         )
                         copied_count += 1
-                        
+
                         if result.stdout:
                             # Show summary for directories, full output for files
                             if item_type == "directory":
@@ -135,21 +142,29 @@ def _setup_model_dir_if_needed(self) -> None:
                                 if len(lines) < 10:
                                     print(result.stdout)
                                 else:
-                                    print(f"  ✓ Copied {item_name}/ ({len(lines)} files)")
+                                    print(
+                                        f"  ✓ Copied {item_name}/ ({len(lines)} files)"
+                                    )
                             else:
                                 print(f"  ✓ Copied {item_name}")
                     except subprocess.CalledProcessError as e:
-                        self.rich_console.print(f"[yellow]⚠️  Warning: Failed to copy {item_name}: {e}[/yellow]")
+                        self.rich_console.print(
+                            f"[yellow]⚠️  Warning: Failed to copy {item_name}: {e}[/yellow]"
+                        )
                         if e.stderr:
                             print(f"    Error details: {e.stderr}")
                         # Continue with other items even if one fails
-                
+
                 if copied_count > 0:
-                    self.rich_console.print(f"[green]✅ Successfully copied {copied_count} item(s) from MODEL_DIR[/green]")
-                
+                    self.rich_console.print(
+                        f"[green]✅ Successfully copied {copied_count} item(s) from MODEL_DIR[/green]"
+                    )
+
                 print(f"Model dir: {model_dir_env} → current dir: {cwd_abs}")
             except Exception as e:
-                self.rich_console.print(f"[yellow]⚠️  Warning: Unexpected error copying MODEL_DIR: {e}[/yellow]")
+                self.rich_console.print(
+                    f"[yellow]⚠️  Warning: Unexpected error copying MODEL_DIR: {e}[/yellow]"
+                )
                 # Continue execution even if copy fails
 
     def discover_models(self) -> None:
@@ -179,7 +194,9 @@ def discover_models(self) -> None:
                 files = os.listdir(root)
 
                 if "models.json" in files and "get_models_json.py" in files:
-                    self.rich_console.print(f"[red]❌ Both models.json and get_models_json.py found in {root}.[/red]")
+                    self.rich_console.print(
+                        f"[red]❌ Both models.json and get_models_json.py found in {root}.[/red]"
+                    )
                     raise ValueError(
                         f"Both models.json and get_models_json.py found in {root}."
                     )
@@ -311,7 +328,9 @@ def select_models(self) -> None:
                             custom_model.update_model()
                             dirname = custom_model.name.split("/")[0]
                             custom_model.dockerfile = os.path.normpath(
-                                os.path.join("scripts", dirname, custom_model.dockerfile)
+                                os.path.join(
+                                    "scripts", dirname, custom_model.dockerfile
+                                )
                             )
                             custom_model.scripts = os.path.normpath(
                                 os.path.join("scripts", dirname, custom_model.scripts)
@@ -339,7 +358,9 @@ def select_models(self) -> None:
                             custom_model.update_model()
                             dirname = custom_model.name.split("/")[0]
                             custom_model.dockerfile = os.path.normpath(
-                                os.path.join("scripts", dirname, custom_model.dockerfile)
+                                os.path.join(
+                                    "scripts", dirname, custom_model.dockerfile
+                                )
                             )
                             custom_model.scripts = os.path.normpath(
                                 os.path.join("scripts", dirname, custom_model.scripts)
@@ -349,7 +370,9 @@ def select_models(self) -> None:
                             tag_models.append(model_dict)
 
                 if not tag_models:
-                    self.rich_console.print(f"[red]❌ No models found corresponding to the given tag: {tag}[/red]")
+                    self.rich_console.print(
+                        f"[red]❌ No models found corresponding to the given tag: {tag}[/red]"
+                    )
                     raise ValueError(
                         f"No models found corresponding to the given tag: {tag}"
                     )
@@ -359,11 +382,15 @@ def select_models(self) -> None:
     def print_models(self) -> None:
         if self.selected_models:
             # print selected models using parsed tags and adding backslash-separated extra args
-            self.rich_console.print(f"[bold green]📋 Selected Models ({len(self.selected_models)} models):[/bold green]")
+            self.rich_console.print(
+                f"[bold green]📋 Selected Models ({len(self.selected_models)} models):[/bold green]"
+            )
             print(json.dumps(self.selected_models, indent=4))
         else:
             # print list of all model names
-            self.rich_console.print(f"[bold cyan]📊 Available Models ({len(self.model_list)} total):[/bold cyan]")
+            self.rich_console.print(
+                f"[bold cyan]📊 Available Models ({len(self.model_list)} total):[/bold cyan]"
+            )
             for model_name in self.model_list:
                 print(f"  {model_name}")
 
diff --git a/src/madengine/utils/gpu_config.py b/src/madengine/utils/gpu_config.py
index 4b3c4143..50029b64 100644
--- a/src/madengine/utils/gpu_config.py
+++ b/src/madengine/utils/gpu_config.py
@@ -16,7 +16,7 @@
 
 import logging
 import warnings
-from typing import Dict, Any, Optional, Tuple
+from typing import Any, Dict, Optional, Tuple
 
 logger = logging.getLogger(__name__)
 
@@ -24,20 +24,20 @@
 class GPUConfigResolver:
     """
     Resolves GPU count from multiple configuration sources with clear precedence.
-    
+
     Handles various field names (n_gpus, gpu_count, gpus_per_node) and provides
     validation to catch configuration mismatches early.
     """
-    
+
     # All recognized field names for GPU count
     GPU_FIELD_ALIASES = [
         "gpus_per_node",  # SLURM, preferred standard
-        "gpu_count",      # Kubernetes
-        "n_gpus",         # Legacy model.json
-        "num_gpus",       # Alternative
-        "ngpus",          # Alternative
+        "gpu_count",  # Kubernetes
+        "n_gpus",  # Legacy model.json
+        "num_gpus",  # Alternative
+        "ngpus",  # Alternative
     ]
-    
+
     @classmethod
     def resolve_gpu_count(
         cls,
@@ -48,16 +48,16 @@ def resolve_gpu_count(
     ) -> Tuple[int, str]:
         """
         Resolve GPU count from multiple sources with clear precedence.
-        
+
         Args:
             model_info: Model configuration from models.json
             deployment_config: Deployment configuration (slurm/k8s section)
             runtime_override: Runtime override from --additional-context
             validate: Whether to validate and warn about mismatches
-        
+
         Returns:
             Tuple of (gpu_count, source) where source indicates which config was used
-            
+
         Examples:
             >>> # Priority 1: Runtime override
             >>> count, source = GPUConfigResolver.resolve_gpu_count(
@@ -67,7 +67,7 @@ def resolve_gpu_count(
             ... )
             >>> count, source
             (4, 'runtime_override')
-            
+
             >>> # Priority 2: Deployment config
             >>> count, source = GPUConfigResolver.resolve_gpu_count(
             ...     model_info={"n_gpus": "1"},
@@ -75,7 +75,7 @@ def resolve_gpu_count(
             ... )
             >>> count, source
             (8, 'deployment_config.slurm.gpus_per_node')
-            
+
             >>> # Priority 3: Model definition
             >>> count, source = GPUConfigResolver.resolve_gpu_count(
             ...     model_info={"n_gpus": "2"}
@@ -84,7 +84,7 @@ def resolve_gpu_count(
             (2, 'model_info.n_gpus')
         """
         sources = []  # Track all sources for validation
-        
+
         # Priority 1: Runtime override
         if runtime_override:
             gpu_count = cls._extract_gpu_count(runtime_override, "runtime_override")
@@ -93,31 +93,36 @@ def resolve_gpu_count(
                 if validate:
                     cls._validate_consistency(sources, model_info, deployment_config)
                 return gpu_count, "runtime_override"
-        
+
         # Priority 2: Deployment-specific config
         if deployment_config:
             # Check for SLURM config
             if "slurm" in deployment_config:
                 gpu_count = cls._extract_gpu_count(
-                    deployment_config["slurm"], 
-                    "deployment_config.slurm"
+                    deployment_config["slurm"], "deployment_config.slurm"
                 )
                 if gpu_count is not None:
                     sources.append(("deployment_config.slurm.gpus_per_node", gpu_count))
                     if validate:
-                        cls._validate_consistency(sources, model_info, deployment_config)
+                        cls._validate_consistency(
+                            sources, model_info, deployment_config
+                        )
                     return gpu_count, "deployment_config.slurm.gpus_per_node"
-            
+
             # Check for K8s config
             if "k8s" in deployment_config or "kubernetes" in deployment_config:
-                k8s_config = deployment_config.get("k8s") or deployment_config.get("kubernetes")
+                k8s_config = deployment_config.get("k8s") or deployment_config.get(
+                    "kubernetes"
+                )
                 gpu_count = cls._extract_gpu_count(k8s_config, "deployment_config.k8s")
                 if gpu_count is not None:
                     sources.append(("deployment_config.k8s.gpu_count", gpu_count))
                     if validate:
-                        cls._validate_consistency(sources, model_info, deployment_config)
+                        cls._validate_consistency(
+                            sources, model_info, deployment_config
+                        )
                     return gpu_count, "deployment_config.k8s.gpu_count"
-        
+
         # Priority 3: Model definition
         if model_info:
             gpu_count = cls._extract_gpu_count(model_info, "model_info")
@@ -126,55 +131,55 @@ def resolve_gpu_count(
                 if validate:
                     cls._validate_consistency(sources, model_info, deployment_config)
                 return gpu_count, "model_info.n_gpus"
-        
+
         # Priority 4: Default
         return 1, "default"
-    
+
     @classmethod
-    def _extract_gpu_count(
-        cls, 
-        config: Dict[str, Any], 
-        context: str
-    ) -> Optional[int]:
+    def _extract_gpu_count(cls, config: Dict[str, Any], context: str) -> Optional[int]:
         """
         Extract GPU count from config dict, trying all known field names.
-        
+
         Args:
             config: Configuration dictionary
             context: Context string for warning messages
-            
+
         Returns:
             GPU count as integer, or None if not found
         """
         if not config:
             return None
-        
+
         found_fields = []
         for field_name in cls.GPU_FIELD_ALIASES:
             if field_name in config:
                 found_fields.append((field_name, config[field_name]))
-        
+
         if not found_fields:
             return None
-        
+
         # Warn if multiple GPU fields found
         if len(found_fields) > 1:
             field_list = ", ".join([f"{name}={val}" for name, val in found_fields])
             logger.warning(
                 "Multiple GPU fields in %s: %s. Using %s=%s",
-                context, field_list, found_fields[0][0], found_fields[0][1],
+                context,
+                field_list,
+                found_fields[0][0],
+                found_fields[0][1],
             )
-        
+
         # Convert to int (handle string values like "8")
         try:
             return int(found_fields[0][1])
         except (ValueError, TypeError):
             logger.warning(
                 "Invalid GPU count in %s: %s. Using default.",
-                context, found_fields[0][1],
+                context,
+                found_fields[0][1],
             )
             return None
-    
+
     @classmethod
     def _validate_consistency(
         cls,
@@ -184,9 +189,9 @@ def _validate_consistency(
     ) -> None:
         """
         Validate consistency between different GPU count sources.
-        
+
         Warns if there are mismatches that might indicate configuration errors.
-        
+
         Args:
             sources: List of (source_name, gpu_count) tuples found so far
             model_info: Model configuration for additional checks
@@ -194,50 +199,51 @@ def _validate_consistency(
         """
         if not sources:
             return
-        
+
         # Collect all GPU counts from all sources
         all_counts = {}
-        
+
         # Add already resolved source
         for source_name, count in sources:
             all_counts[source_name] = count
-        
+
         # Check model_info
         if model_info:
             model_gpu = cls._extract_gpu_count(model_info, "model_info")
             if model_gpu is not None:
                 all_counts["model_info.n_gpus"] = model_gpu
-        
+
         # Check deployment config
         if deployment_config:
             if "slurm" in deployment_config:
-                slurm_gpu = cls._extract_gpu_count(
-                    deployment_config["slurm"], "slurm"
-                )
+                slurm_gpu = cls._extract_gpu_count(deployment_config["slurm"], "slurm")
                 if slurm_gpu is not None:
                     all_counts["deployment_config.slurm.gpus_per_node"] = slurm_gpu
-            
+
             if "k8s" in deployment_config or "kubernetes" in deployment_config:
-                k8s_config = deployment_config.get("k8s") or deployment_config.get("kubernetes")
+                k8s_config = deployment_config.get("k8s") or deployment_config.get(
+                    "kubernetes"
+                )
                 k8s_gpu = cls._extract_gpu_count(k8s_config, "k8s")
                 if k8s_gpu is not None:
                     all_counts["deployment_config.k8s.gpu_count"] = k8s_gpu
-        
+
         # Check for mismatches
         unique_counts = set(all_counts.values())
         if len(unique_counts) > 1:
             mismatch_details = ", ".join([f"{k}={v}" for k, v in all_counts.items()])
             # Determine if this is likely intentional (deployment override) or an error
-            is_deployment_override = (
-                sources[0][0].startswith("runtime_override") or
-                sources[0][0].startswith("deployment_config")
-            )
-            
+            is_deployment_override = sources[0][0].startswith(
+                "runtime_override"
+            ) or sources[0][0].startswith("deployment_config")
+
             if is_deployment_override:
                 # This is normal - deployment config overriding model default
                 logger.info(
                     "GPU configuration override: %s=%s (overriding model default: %s)",
-                    sources[0][0], sources[0][1], mismatch_details.split(",")[-1].strip(),
+                    sources[0][0],
+                    sources[0][1],
+                    mismatch_details.split(",")[-1].strip(),
                 )
             else:
                 # Potentially unexpected mismatch - use warning for actual errors
@@ -246,7 +252,7 @@ def _validate_consistency(
                     f"   Using: {sources[0][0]}={sources[0][1]}\n"
                     f"   Precedence: runtime_override > deployment_config > model_info > default",
                     UserWarning,
-                    stacklevel=4
+                    stacklevel=4,
                 )
 
 
@@ -256,16 +262,16 @@ def resolve_runtime_gpus(
 ) -> int:
     """
     Convenience function for resolving GPU count at runtime.
-    
+
     This is the main entry point for runtime GPU resolution.
-    
+
     Args:
         model_info: Model configuration from manifest
         additional_context: Additional context from CLI or config files
-        
+
     Returns:
         Resolved GPU count as integer
-        
+
     Example:
         >>> model_info = {"name": "my_model", "n_gpus": "1"}
         >>> additional_context = {"slurm": {"gpus_per_node": 8}}
@@ -275,18 +281,20 @@ def resolve_runtime_gpus(
     """
     # Extract deployment config from additional_context
     deployment_config = additional_context.get("deployment_config", {})
-    
+
     # Also check for direct slurm/k8s keys in additional_context
     if "slurm" in additional_context:
         if not deployment_config:
             deployment_config = {}
         deployment_config["slurm"] = additional_context["slurm"]
-    
+
     if "k8s" in additional_context or "kubernetes" in additional_context:
         if not deployment_config:
             deployment_config = {}
-        deployment_config["k8s"] = additional_context.get("k8s") or additional_context.get("kubernetes")
-    
+        deployment_config["k8s"] = additional_context.get(
+            "k8s"
+        ) or additional_context.get("kubernetes")
+
     # Check for direct runtime GPU override (in additional_context or deployment_config)
     runtime_override = None
     for field in GPUConfigResolver.GPU_FIELD_ALIASES:
@@ -297,15 +305,14 @@ def resolve_runtime_gpus(
         if deployment_config and field in deployment_config:
             runtime_override = {field: deployment_config[field]}
             break
-    
+
     gpu_count, source = GPUConfigResolver.resolve_gpu_count(
         model_info=model_info,
         deployment_config=deployment_config,
         runtime_override=runtime_override,
         validate=True,
     )
-    
+
     logger.info("Resolved GPU count: %s (from %s)", gpu_count, source)
-    
-    return gpu_count
 
+    return gpu_count
diff --git a/src/madengine/utils/gpu_tool_factory.py b/src/madengine/utils/gpu_tool_factory.py
index b3a0b566..300a3cbb 100644
--- a/src/madengine/utils/gpu_tool_factory.py
+++ b/src/madengine/utils/gpu_tool_factory.py
@@ -36,18 +36,18 @@ def get_gpu_tool_manager(
 
     Returns:
         GPU tool manager instance for the specified vendor
-        
+
     Raises:
         ValueError: If vendor is unknown or unsupported
         ImportError: If vendor-specific manager module cannot be imported
-        
+
     Example:
         >>> from madengine.utils.gpu_tool_factory import get_gpu_tool_manager
         >>> from madengine.utils.gpu_validator import GPUVendor
-        >>> 
+        >>>
         >>> # Auto-detect vendor
         >>> manager = get_gpu_tool_manager()
-        >>> 
+        >>>
         >>> # Explicit vendor
         >>> amd_manager = get_gpu_tool_manager(GPUVendor.AMD)
         >>> nvidia_manager = get_gpu_tool_manager(GPUVendor.NVIDIA)
@@ -69,6 +69,7 @@ def get_gpu_tool_manager(
     if vendor == GPUVendor.AMD:
         try:
             from madengine.utils.rocm_tool_manager import ROCmToolManager
+
             manager = ROCmToolManager(rocm_path=rocm_path)
             logger.info(f"Created new ROCm tool manager")
         except ImportError as e:
@@ -77,21 +78,22 @@ def get_gpu_tool_manager(
     elif vendor == GPUVendor.NVIDIA:
         try:
             from madengine.utils.nvidia_tool_manager import NvidiaToolManager
+
             manager = NvidiaToolManager()
             logger.info(f"Created new NVIDIA tool manager")
         except ImportError as e:
             raise ImportError(f"Failed to import NVIDIA tool manager: {e}")
-            
+
     elif vendor == GPUVendor.UNKNOWN:
         raise ValueError(
             "Unable to detect GPU vendor. Ensure GPU drivers and tools are installed.\n"
             "For AMD: Install ROCm (https://github.com/ROCm/ROCm)\n"
             "For NVIDIA: Install CUDA toolkit"
         )
-        
+
     else:
         raise ValueError(f"Unsupported GPU vendor: {vendor.value}")
-    
+
     # Cache the manager instance
     _manager_instances[cache_key] = manager
 
@@ -100,18 +102,18 @@ def get_gpu_tool_manager(
 
 def clear_manager_cache() -> None:
     """Clear all cached manager instances.
-    
+
     Useful for testing or when GPU configuration changes during runtime.
     This will force recreation of managers on next call to get_gpu_tool_manager().
-    
+
     Also clears internal caches within each manager before removing them.
     """
     global _manager_instances
-    
+
     # Clear caches within managers before removing them
     for manager in _manager_instances.values():
         manager.clear_cache()
-    
+
     _manager_instances.clear()
     logger.debug("Cleared all GPU tool manager instances")
 
@@ -126,4 +128,3 @@ def get_cached_managers() -> Dict[tuple, BaseGPUToolManager]:
         Dictionary mapping (vendor, rocm_path) to manager instances
     """
     return _manager_instances.copy()
-
diff --git a/src/madengine/utils/gpu_tool_manager.py b/src/madengine/utils/gpu_tool_manager.py
index 701e1db7..1eff8205 100644
--- a/src/madengine/utils/gpu_tool_manager.py
+++ b/src/madengine/utils/gpu_tool_manager.py
@@ -20,175 +20,164 @@
 
 class BaseGPUToolManager(ABC):
     """Abstract base class for GPU vendor-specific tool managers.
-    
+
     Provides common infrastructure for:
     - Tool availability checking
     - Command execution with timeout
     - Result caching
     - Consistent logging
-    
+
     Subclasses implement vendor-specific logic for:
     - Version detection
     - Tool selection
     - Command execution with fallback
     """
-    
+
     def __init__(self):
         """Initialize base GPU tool manager."""
         self._cache: Dict[str, Any] = {}
         self._cache_lock = threading.Lock()
-        
+
     @abstractmethod
     def get_version(self) -> Optional[str]:
         """Get GPU vendor tool version (e.g., ROCm version, CUDA version).
-        
+
         Returns:
             Version string or None if unable to detect
         """
         pass
-    
+
     @abstractmethod
     def execute_command(
-        self,
-        command: str,
-        fallback_command: Optional[str] = None,
-        timeout: int = 30
+        self, command: str, fallback_command: Optional[str] = None, timeout: int = 30
     ) -> str:
         """Execute command with optional fallback.
-        
+
         Args:
             command: Primary command to execute
             fallback_command: Optional fallback command if primary fails
             timeout: Command timeout in seconds
-            
+
         Returns:
             Command output as string
-            
+
         Raises:
             RuntimeError: If both primary and fallback commands fail
         """
         pass
-    
+
     def is_tool_available(self, tool_path: str) -> bool:
         """Check if a tool exists and is executable.
-        
+
         Args:
             tool_path: Path to the tool (e.g., /opt/rocm/bin/amd-smi)
-            
+
         Returns:
             True if tool exists and is executable, False otherwise
         """
         cache_key = f"tool_available:{tool_path}"
-        
+
         # Check cache first
         with self._cache_lock:
             if cache_key in self._cache:
                 return self._cache[cache_key]
-        
+
         # Check if file exists and is executable
         result = os.path.isfile(tool_path) and os.access(tool_path, os.X_OK)
-        
+
         # Cache the result
         with self._cache_lock:
             self._cache[cache_key] = result
-        
+
         return result
-    
+
     def _execute_shell_command(
-        self,
-        command: str,
-        timeout: int = 30,
-        check_returncode: bool = True
+        self, command: str, timeout: int = 30, check_returncode: bool = True
     ) -> Tuple[bool, str, str]:
         """Execute a shell command and return result.
-        
+
         Args:
             command: Shell command to execute
             timeout: Timeout in seconds
             check_returncode: If True, only succeed on returncode 0
-            
+
         Returns:
             Tuple of (success, stdout, stderr)
         """
         try:
             result = subprocess.run(
-                command,
-                shell=True,
-                capture_output=True,
-                text=True,
-                timeout=timeout
+                command, shell=True, capture_output=True, text=True, timeout=timeout
             )
-            
+
             success = (result.returncode == 0) if check_returncode else True
             return success, result.stdout.strip(), result.stderr.strip()
-            
+
         except subprocess.TimeoutExpired:
             return False, "", f"Command timed out after {timeout} seconds"
         except FileNotFoundError:
             return False, "", f"Command not found: {command.split()[0]}"
         except Exception as e:
             return False, "", f"Command execution error: {str(e)}"
-    
+
     def _cache_result(self, key: str, value: Any) -> None:
         """Cache a result for future use.
-        
+
         Args:
             key: Cache key
             value: Value to cache
         """
         with self._cache_lock:
             self._cache[key] = value
-    
+
     def _get_cached_result(self, key: str) -> Optional[Any]:
         """Get a cached result.
-        
+
         Args:
             key: Cache key
-            
+
         Returns:
             Cached value or None if not found
         """
         with self._cache_lock:
             return self._cache.get(key)
-    
+
     def _log_debug(self, message: str) -> None:
         """Log a debug message.
-        
+
         Args:
             message: Debug message
         """
         logger.debug(f"[{self.__class__.__name__}] {message}")
-    
+
     def _log_info(self, message: str) -> None:
         """Log an info message.
-        
+
         Args:
             message: Info message
         """
         logger.info(f"[{self.__class__.__name__}] {message}")
-    
+
     def _log_warning(self, message: str) -> None:
         """Log a warning message.
-        
+
         Args:
             message: Warning message
         """
         logger.warning(f"[{self.__class__.__name__}] {message}")
-    
+
     def _log_error(self, message: str) -> None:
         """Log an error message.
-        
+
         Args:
             message: Error message
         """
         logger.error(f"[{self.__class__.__name__}] {message}")
-    
+
     def clear_cache(self) -> None:
         """Clear all cached results.
-        
+
         Useful for testing or when tools are installed/updated during runtime.
         """
         with self._cache_lock:
             self._cache.clear()
         self._log_debug("Cache cleared")
-
diff --git a/src/madengine/utils/gpu_validator.py b/src/madengine/utils/gpu_validator.py
index 8429891e..d68fdc39 100644
--- a/src/madengine/utils/gpu_validator.py
+++ b/src/madengine/utils/gpu_validator.py
@@ -8,17 +8,18 @@
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
-import subprocess
 import os
-from typing import List, Tuple, Optional
+import subprocess
 from dataclasses import dataclass
 from enum import Enum
+from typing import List, Optional, Tuple
 
 from madengine.core.constants import get_rocm_path
 
 
 class GPUVendor(Enum):
     """Supported GPU vendors"""
+
     AMD = "AMD"
     NVIDIA = "NVIDIA"
     UNKNOWN = "UNKNOWN"
@@ -27,13 +28,14 @@ class GPUVendor(Enum):
 @dataclass
 class GPUValidationResult:
     """Result of GPU validation check"""
+
     is_valid: bool
     vendor: GPUVendor
     version: Optional[str] = None  # ROCm version or CUDA version
     issues: List[str] = None
     warnings: List[str] = None
     suggestions: List[str] = None
-    
+
     def __post_init__(self):
         if self.issues is None:
             self.issues = []
@@ -48,8 +50,8 @@ class ROCmValidator:
 
     # KFD (Kernel Fusion Driver) paths - not under ROCm install
     KFD_PATHS = {
-        'kfd_device': '/dev/kfd',
-        'kfd_topology': '/sys/devices/virtual/kfd/kfd/topology/nodes',
+        "kfd_device": "/dev/kfd",
+        "kfd_topology": "/sys/devices/virtual/kfd/kfd/topology/nodes",
     }
 
     def __init__(self, verbose: bool = False, rocm_path: Optional[str] = None):
@@ -62,32 +64,29 @@ def __init__(self, verbose: bool = False, rocm_path: Optional[str] = None):
         self.verbose = verbose
         self.rocm_path = get_rocm_path(rocm_path)
         self.ESSENTIAL_PATHS = {
-            'rocm_root': self.rocm_path,
-            'hip_path': os.path.join(self.rocm_path, 'bin', 'hipconfig'),
-            'rocminfo': os.path.join(self.rocm_path, 'bin', 'rocminfo'),
+            "rocm_root": self.rocm_path,
+            "hip_path": os.path.join(self.rocm_path, "bin", "hipconfig"),
+            "rocminfo": os.path.join(self.rocm_path, "bin", "rocminfo"),
         }
         self.RECOMMENDED_PATHS = {
-            'amd_smi': os.path.join(self.rocm_path, 'bin', 'amd-smi'),
-            'rocm_smi': os.path.join(self.rocm_path, 'bin', 'rocm-smi'),
+            "amd_smi": os.path.join(self.rocm_path, "bin", "amd-smi"),
+            "rocm_smi": os.path.join(self.rocm_path, "bin", "rocm-smi"),
         }
         self._tool_manager = None  # Lazy initialization
-        
+
     def _run_command(self, cmd: List[str], timeout: int = 10) -> Tuple[bool, str, str]:
         """Run a command and return success status and output
-        
+
         Args:
             cmd: Command to run as list of strings
             timeout: Timeout in seconds
-            
+
         Returns:
             Tuple of (success, stdout, stderr)
         """
         try:
             result = subprocess.run(
-                cmd,
-                capture_output=True,
-                text=True,
-                timeout=timeout
+                cmd, capture_output=True, text=True, timeout=timeout
             )
             return result.returncode == 0, result.stdout.strip(), result.stderr.strip()
         except subprocess.TimeoutExpired:
@@ -96,33 +95,34 @@ def _run_command(self, cmd: List[str], timeout: int = 10) -> Tuple[bool, str, st
             return False, "", f"Command not found: {cmd[0]}"
         except Exception as e:
             return False, "", str(e)
-    
+
     def _check_path_exists(self, path: str) -> bool:
         """Check if a path exists"""
         return os.path.exists(path)
-    
+
     def _get_tool_manager(self):
         """Get or create ROCm tool manager instance
-        
+
         Returns:
             ROCmToolManager instance
         """
         if self._tool_manager is None:
             try:
                 from madengine.utils.rocm_tool_manager import ROCmToolManager
+
                 self._tool_manager = ROCmToolManager(rocm_path=self.rocm_path)
             except ImportError as e:
                 if self.verbose:
                     print(f"Warning: Could not import ROCmToolManager: {e}")
                 return None
         return self._tool_manager
-    
+
     def _get_rocm_version(self) -> Optional[str]:
         """Get ROCm version from system using tool manager
-        
+
         Returns:
             ROCm version string or None if not found
-            
+
         Enhancement:
             Uses ROCmToolManager for robust multi-method version detection.
         """
@@ -133,30 +133,30 @@ def _get_rocm_version(self) -> Optional[str]:
                 return tool_manager.get_version()
             except Exception:
                 pass  # Fallback to direct methods
-        
+
         # Fallback: Try hipconfig first
-        success, stdout, _ = self._run_command(['hipconfig', '--version'])
+        success, stdout, _ = self._run_command(["hipconfig", "--version"])
         if success and stdout:
-            return stdout.split('-')[0]  # Remove build suffix
-        
+            return stdout.split("-")[0]  # Remove build suffix
+
         # Try version file
-        version_file = os.path.join(self.rocm_path, '.info', 'version')
+        version_file = os.path.join(self.rocm_path, ".info", "version")
         if os.path.exists(version_file):
             try:
-                with open(version_file, 'r') as f:
-                    version = f.read().strip().split('-')[0]
+                with open(version_file, "r") as f:
+                    version = f.read().strip().split("-")[0]
                     return version
             except Exception:
                 pass
-        
+
         return None
-    
+
     def _check_gpu_accessible(self) -> Tuple[bool, str]:
         """Check if GPUs are accessible using version-aware tool selection
-        
+
         Returns:
             Tuple of (accessible, message)
-            
+
         Enhancement:
             Uses tool manager to prefer correct tool based on ROCm version (PR #54).
         """
@@ -168,73 +168,80 @@ def _check_gpu_accessible(self) -> Tuple[bool, str]:
                 if count > 0:
                     version = tool_manager.get_rocm_version()
                     preferred_tool = tool_manager.get_preferred_smi_tool()
-                    return True, f"GPUs accessible via tool manager ({preferred_tool}, ROCm {version})"
+                    return (
+                        True,
+                        f"GPUs accessible via tool manager ({preferred_tool}, ROCm {version})",
+                    )
             except Exception:
                 pass  # Fall back to direct checks
-        
+
         # Fallback: Try rocminfo first (most reliable for detection)
-        success, stdout, stderr = self._run_command(['rocminfo'])
+        success, stdout, stderr = self._run_command(["rocminfo"])
         if success:
             # Check if any GPU agents are listed
-            if 'Agent' in stdout and 'gfx' in stdout.lower():
+            if "Agent" in stdout and "gfx" in stdout.lower():
                 return True, "GPUs accessible via rocminfo"
             else:
                 return False, "rocminfo ran but no GPU agents detected"
-        
+
         # Try amd-smi
-        success, stdout, stderr = self._run_command(['amd-smi', 'list'])
+        success, stdout, stderr = self._run_command(["amd-smi", "list"])
         if success and stdout:
             return True, "GPUs accessible via amd-smi"
-        
+
         # Try rocm-smi
-        success, stdout, stderr = self._run_command(['rocm-smi'])
+        success, stdout, stderr = self._run_command(["rocm-smi"])
         if success and stdout:
             return True, "GPUs accessible via rocm-smi"
-        
+
         return False, "No GPU management tool could detect GPUs"
-    
+
     def _check_kfd_driver(self) -> Tuple[bool, List[str], List[str]]:
         """Check if KFD driver is loaded
-        
+
         Returns:
             Tuple of (loaded, critical_issues, warnings)
         """
         critical_issues = []
         warnings = []
-        
+
         # Check /dev/kfd - this is critical
-        if not self._check_path_exists('/dev/kfd'):
-            critical_issues.append("/dev/kfd device not found - KFD driver may not be loaded")
-        
+        if not self._check_path_exists("/dev/kfd"):
+            critical_issues.append(
+                "/dev/kfd device not found - KFD driver may not be loaded"
+            )
+
         # Check KFD topology - this is critical
-        if not self._check_path_exists('/sys/devices/virtual/kfd/kfd/topology/nodes'):
-            critical_issues.append("KFD topology not found - GPU topology may not be available")
-        
+        if not self._check_path_exists("/sys/devices/virtual/kfd/kfd/topology/nodes"):
+            critical_issues.append(
+                "KFD topology not found - GPU topology may not be available"
+            )
+
         # Check dmesg for amdgpu module - this is just a warning if other checks pass
-        success, stdout, _ = self._run_command(['dmesg'], timeout=5)
+        success, stdout, _ = self._run_command(["dmesg"], timeout=5)
         if success:
-            if 'amdgpu' not in stdout.lower():
+            if "amdgpu" not in stdout.lower():
                 warnings.append("amdgpu driver messages not found in dmesg")
-        
+
         return len(critical_issues) == 0, critical_issues, warnings
-    
+
     def validate(self) -> GPUValidationResult:
         """Perform comprehensive ROCm validation
-        
+
         Returns:
             GPUValidationResult with validation results
         """
         result = GPUValidationResult(is_valid=True, vendor=GPUVendor.AMD)
-        
+
         if self.verbose:
             print("=" * 70)
             print("ROCm Installation Validation")
             print("=" * 70)
-        
+
         # 1. Check essential paths
         if self.verbose:
             print("\n[1/6] Checking essential ROCm paths...")
-        
+
         for name, path in self.ESSENTIAL_PATHS.items():
             if not self._check_path_exists(path):
                 result.is_valid = False
@@ -244,11 +251,11 @@ def validate(self) -> GPUValidationResult:
             else:
                 if self.verbose:
                     print(f"  ✓ {name}: Found at {path}")
-        
+
         # 2. Get ROCm version
         if self.verbose:
             print("\n[2/6] Detecting ROCm version...")
-        
+
         version = self._get_rocm_version()
         if version:
             result.version = version
@@ -259,11 +266,11 @@ def validate(self) -> GPUValidationResult:
             result.issues.append("Unable to detect ROCm version")
             if self.verbose:
                 print(f"  ✗ ROCm version: NOT DETECTED")
-        
+
         # 3. Check recommended tools
         if self.verbose:
             print("\n[3/6] Checking recommended ROCm tools...")
-        
+
         has_smi = False
         for name, path in self.RECOMMENDED_PATHS.items():
             if self._check_path_exists(path):
@@ -273,21 +280,21 @@ def validate(self) -> GPUValidationResult:
             else:
                 if self.verbose:
                     print(f"  ⚠ {name}: NOT FOUND at {path}")
-        
+
         if not has_smi:
             result.warnings.append("No GPU management tool (amd-smi/rocm-smi) found")
             result.suggestions.append("Install ROCm SMI tools for GPU monitoring")
-        
+
         # 4. Check KFD driver
         if self.verbose:
             print("\n[4/6] Checking KFD driver...")
-        
+
         kfd_ok, kfd_critical_issues, kfd_warnings = self._check_kfd_driver()
-        
+
         # 5. Check GPU accessibility
         if self.verbose:
             print("\n[5/6] Checking GPU accessibility...")
-        
+
         gpu_accessible, gpu_msg = self._check_gpu_accessible()
         if gpu_accessible:
             if self.verbose:
@@ -297,7 +304,7 @@ def validate(self) -> GPUValidationResult:
             result.issues.append(gpu_msg)
             if self.verbose:
                 print(f"  ✗ {gpu_msg}")
-        
+
         # Now decide how to handle KFD issues based on GPU accessibility
         # If GPUs are accessible, treat KFD dmesg warnings as non-critical
         if not kfd_ok:
@@ -327,25 +334,31 @@ def validate(self) -> GPUValidationResult:
                 print(f"  ✓ KFD driver loaded")
                 for warning in kfd_warnings:
                     print(f"  ⚠ {warning}")
-        
+
         # 6. Check permissions
         if self.verbose:
             print("\n[6/6] Checking permissions...")
-        
-        if os.path.exists('/dev/kfd'):
+
+        if os.path.exists("/dev/kfd"):
             try:
                 # Try to access /dev/kfd
-                if os.access('/dev/kfd', os.R_OK | os.W_OK):
+                if os.access("/dev/kfd", os.R_OK | os.W_OK):
                     if self.verbose:
                         print(f"  ✓ /dev/kfd is accessible")
                 else:
-                    result.warnings.append("Current user may not have permission to access /dev/kfd")
-                    result.suggestions.append("Add user to 'video' or 'render' group: sudo usermod -aG video,render $USER")
+                    result.warnings.append(
+                        "Current user may not have permission to access /dev/kfd"
+                    )
+                    result.suggestions.append(
+                        "Add user to 'video' or 'render' group: sudo usermod -aG video,render $USER"
+                    )
                     if self.verbose:
-                        print(f"  ⚠ /dev/kfd exists but may not be accessible by current user")
+                        print(
+                            f"  ⚠ /dev/kfd exists but may not be accessible by current user"
+                        )
             except Exception as e:
                 result.warnings.append(f"Unable to check /dev/kfd permissions: {e}")
-        
+
         # Generate suggestions based on issues
         if result.issues:
             if not self._check_path_exists(self.rocm_path):
@@ -354,7 +367,7 @@ def validate(self) -> GPUValidationResult:
                     "Set ROCM_PATH if using a non-default install, or install ROCm: "
                     "https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html"
                 )
-            
+
             if "KFD driver" in str(result.issues):
                 result.suggestions.append(
                     "Load amdgpu kernel module: sudo modprobe amdgpu"
@@ -362,7 +375,7 @@ def validate(self) -> GPUValidationResult:
                 result.suggestions.append(
                     "Reboot the system after ROCm installation to ensure kernel drivers are loaded"
                 )
-        
+
         # Print summary
         if self.verbose:
             print("\n" + "=" * 70)
@@ -372,87 +385,88 @@ def validate(self) -> GPUValidationResult:
                 print("✓ ROCm installation is VALID")
             else:
                 print("✗ ROCm installation has ISSUES")
-            
+
             if result.version:
                 print(f"\nROCm Version: {result.version}")
-            
+
             if result.issues:
                 print(f"\nIssues Found ({len(result.issues)}):")
                 for i, issue in enumerate(result.issues, 1):
                     print(f"  {i}. {issue}")
-            
+
             if result.warnings:
                 print(f"\nWarnings ({len(result.warnings)}):")
                 for i, warning in enumerate(result.warnings, 1):
                     print(f"  {i}. {warning}")
-            
+
             if result.suggestions:
                 print(f"\nSuggestions ({len(result.suggestions)}):")
                 for i, suggestion in enumerate(result.suggestions, 1):
                     print(f"  {i}. {suggestion}")
-            
+
             print("=" * 70)
-        
+
         return result
-    
+
     def get_error_message(self, result: GPUValidationResult) -> str:
         """Generate a detailed error message from validation result
-        
+
         Args:
             result: ROCmValidationResult from validate()
-            
+
         Returns:
             Formatted error message string
         """
         if result.is_valid:
             return ""
-        
+
         lines = ["ROCm installation validation FAILED:"]
         lines.append("")
-        
+
         if result.issues:
             lines.append("Critical Issues:")
             for issue in result.issues:
                 lines.append(f"  - {issue}")
             lines.append("")
-        
+
         if result.warnings:
             lines.append("Warnings:")
             for warning in result.warnings:
                 lines.append(f"  - {warning}")
             lines.append("")
-        
+
         if result.suggestions:
             lines.append("Suggested Actions:")
             for suggestion in result.suggestions:
                 lines.append(f"  • {suggestion}")
             lines.append("")
-        
-        lines.append("Please ensure ROCm is properly installed before running madengine.")
-        lines.append("Installation guide: https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html")
-        
+
+        lines.append(
+            "Please ensure ROCm is properly installed before running madengine."
+        )
+        lines.append(
+            "Installation guide: https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html"
+        )
+
         return "\n".join(lines)
 
 
 class NVIDIAValidator:
     """Validator for NVIDIA CUDA installation"""
-    
+
     def __init__(self, verbose: bool = False):
         """Initialize NVIDIA validator
-        
+
         Args:
             verbose: If True, print detailed validation progress
         """
         self.verbose = verbose
-    
+
     def _run_command(self, cmd: List[str], timeout: int = 10) -> Tuple[bool, str, str]:
         """Run a command and return success status and output"""
         try:
             result = subprocess.run(
-                cmd,
-                capture_output=True,
-                text=True,
-                timeout=timeout
+                cmd, capture_output=True, text=True, timeout=timeout
             )
             return result.returncode == 0, result.stdout.strip(), result.stderr.strip()
         except subprocess.TimeoutExpired:
@@ -461,43 +475,46 @@ def _run_command(self, cmd: List[str], timeout: int = 10) -> Tuple[bool, str, st
             return False, "", f"Command not found: {cmd[0]}"
         except Exception as e:
             return False, "", str(e)
-    
+
     def _get_cuda_version(self) -> Optional[str]:
         """Get CUDA version from nvidia-smi or nvcc"""
         # Try nvidia-smi first
-        success, stdout, _ = self._run_command(['nvidia-smi', '--query-gpu=driver_version', '--format=csv,noheader'])
+        success, stdout, _ = self._run_command(
+            ["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader"]
+        )
         if success and stdout:
-            return stdout.split('\n')[0].strip()
-        
+            return stdout.split("\n")[0].strip()
+
         # Try nvcc as fallback
-        success, stdout, _ = self._run_command(['nvcc', '--version'])
-        if success and 'release' in stdout.lower():
+        success, stdout, _ = self._run_command(["nvcc", "--version"])
+        if success and "release" in stdout.lower():
             # Extract version from output like "release 11.8, V11.8.89"
             import re
-            match = re.search(r'release (\d+\.\d+)', stdout)
+
+            match = re.search(r"release (\d+\.\d+)", stdout)
             if match:
                 return match.group(1)
-        
+
         return None
-    
+
     def validate(self) -> GPUValidationResult:
         """Perform NVIDIA CUDA validation
-        
+
         Returns:
             GPUValidationResult with validation results
         """
         result = GPUValidationResult(is_valid=True, vendor=GPUVendor.NVIDIA)
-        
+
         if self.verbose:
             print("=" * 70)
             print("NVIDIA GPU (CUDA) Validation")
             print("=" * 70)
             print()
-        
+
         # 1. Check nvidia-smi
         if self.verbose:
             print("[1/4] Checking nvidia-smi availability...")
-        
+
         if not os.path.exists("/usr/bin/nvidia-smi"):
             result.is_valid = False
             result.issues.append("nvidia-smi not found at /usr/bin/nvidia-smi")
@@ -506,12 +523,12 @@ def validate(self) -> GPUValidationResult:
         else:
             if self.verbose:
                 print("  ✓ nvidia-smi: Found")
-        
+
         # 2. Test nvidia-smi execution
         if self.verbose:
             print("\n[2/4] Testing nvidia-smi execution...")
-        
-        success, stdout, stderr = self._run_command(['nvidia-smi', '--list-gpus'])
+
+        success, stdout, stderr = self._run_command(["nvidia-smi", "--list-gpus"])
         if not success:
             result.is_valid = False
             result.issues.append(f"nvidia-smi failed to execute: {stderr}")
@@ -520,11 +537,11 @@ def validate(self) -> GPUValidationResult:
         else:
             if self.verbose:
                 print("  ✓ nvidia-smi executed successfully")
-        
+
         # 3. Get CUDA version
         if self.verbose:
             print("\n[3/4] Detecting CUDA version...")
-        
+
         version = self._get_cuda_version()
         if version:
             result.version = version
@@ -534,25 +551,25 @@ def validate(self) -> GPUValidationResult:
             result.warnings.append("Unable to detect CUDA version")
             if self.verbose:
                 print("  ⚠ Could not detect CUDA version")
-        
+
         # 4. Count GPUs
         if self.verbose:
             print("\n[4/4] Counting available GPUs...")
-        
-        success, stdout, _ = self._run_command(['nvidia-smi', '--list-gpus'])
+
+        success, stdout, _ = self._run_command(["nvidia-smi", "--list-gpus"])
         if success and stdout:
-            gpu_count = len([line for line in stdout.split('\n') if line.strip()])
+            gpu_count = len([line for line in stdout.split("\n") if line.strip()])
             if gpu_count > 0:
                 if self.verbose:
                     print(f"  ✓ Found {gpu_count} GPU(s)")
-                    for line in stdout.split('\n'):
+                    for line in stdout.split("\n"):
                         if line.strip():
                             print(f"     - {line.strip()}")
             else:
                 result.warnings.append("No GPUs detected")
                 if self.verbose:
                     print("  ⚠ No GPUs detected")
-        
+
         # Generate suggestions
         if result.issues:
             if "nvidia-smi not found" in str(result.issues):
@@ -561,9 +578,11 @@ def validate(self) -> GPUValidationResult:
                     "https://developer.nvidia.com/cuda-downloads"
                 )
             if "failed to execute" in str(result.issues):
-                result.suggestions.append("Check if NVIDIA drivers are properly loaded: lsmod | grep nvidia")
+                result.suggestions.append(
+                    "Check if NVIDIA drivers are properly loaded: lsmod | grep nvidia"
+                )
                 result.suggestions.append("Try reinstalling NVIDIA drivers")
-        
+
         if self.verbose:
             print("\n" + "=" * 70)
             print("NVIDIA Validation Summary")
@@ -572,27 +591,27 @@ def validate(self) -> GPUValidationResult:
                 print("✓ NVIDIA GPU installation is VALID")
             else:
                 print("✗ NVIDIA GPU installation has ISSUES")
-            
+
             if result.version:
                 print(f"\nDriver/CUDA Version: {result.version}")
-            
+
             if result.issues:
                 print(f"\nIssues Found ({len(result.issues)}):")
                 for i, issue in enumerate(result.issues, 1):
                     print(f"  {i}. {issue}")
-            
+
             if result.warnings:
                 print(f"\nWarnings ({len(result.warnings)}):")
                 for i, warning in enumerate(result.warnings, 1):
                     print(f"  {i}. {warning}")
-            
+
             if result.suggestions:
                 print(f"\nSuggestions ({len(result.suggestions)}):")
                 for i, suggestion in enumerate(result.suggestions, 1):
                     print(f"  {i}. {suggestion}")
-            
+
             print("=" * 70)
-        
+
         return result
 
 
@@ -608,7 +627,9 @@ def detect_gpu_vendor(rocm_path: Optional[str] = None) -> GPUVendor:
     if os.path.exists("/usr/bin/nvidia-smi"):
         return GPUVendor.NVIDIA
     rocm = get_rocm_path(rocm_path)
-    if os.path.exists(os.path.join(rocm, "bin", "rocm-smi")) or os.path.exists(os.path.join(rocm, "bin", "amd-smi")):
+    if os.path.exists(os.path.join(rocm, "bin", "rocm-smi")) or os.path.exists(
+        os.path.join(rocm, "bin", "amd-smi")
+    ):
         return GPUVendor.AMD
     if os.path.exists("/usr/local/bin/amd-smi"):
         return GPUVendor.AMD
@@ -648,7 +669,7 @@ def validate_gpu_installation(
             version=rocm_result.version,
             issues=rocm_result.issues,
             warnings=rocm_result.warnings,
-            suggestions=rocm_result.suggestions
+            suggestions=rocm_result.suggestions,
         )
     elif vendor == GPUVendor.NVIDIA:
         validator = NVIDIAValidator(verbose=verbose)
@@ -656,63 +677,67 @@ def validate_gpu_installation(
     else:
         result = GPUValidationResult(is_valid=False, vendor=GPUVendor.UNKNOWN)
         result.issues.append("No GPU vendor detected")
-        result.suggestions.append("Install NVIDIA drivers (https://developer.nvidia.com/cuda-downloads)")
+        result.suggestions.append(
+            "Install NVIDIA drivers (https://developer.nvidia.com/cuda-downloads)"
+        )
         result.suggestions.append("Or install AMD ROCm (https://rocm.docs.amd.com)")
-    
+
     if not result.is_valid and raise_on_error:
         raise GPUInstallationError(result)
-    
+
     return result
 
 
 class GPUInstallationError(RuntimeError):
     """Exception raised when GPU installation validation fails"""
-    
+
     def __init__(self, validation_result: GPUValidationResult):
         """Initialize with validation result
-        
+
         Args:
             validation_result: GPUValidationResult from validation
         """
         self.validation_result = validation_result
         message = self._format_error_message(validation_result)
         super().__init__(message)
-    
+
     def _format_error_message(self, result: GPUValidationResult) -> str:
         """Generate a detailed error message from validation result"""
         if result.is_valid:
             return ""
-        
+
         lines = [f"{result.vendor.value} GPU installation validation FAILED:"]
         lines.append("")
-        
+
         if result.issues:
             lines.append("Critical Issues:")
             for issue in result.issues:
                 lines.append(f"  - {issue}")
             lines.append("")
-        
+
         if result.warnings:
             lines.append("Warnings:")
             for warning in result.warnings:
                 lines.append(f"  - {warning}")
             lines.append("")
-        
+
         if result.suggestions:
             lines.append("Suggested Actions:")
             for suggestion in result.suggestions:
                 lines.append(f"  • {suggestion}")
             lines.append("")
-        
+
         vendor_docs = {
             GPUVendor.AMD: "https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html",
             GPUVendor.NVIDIA: "https://developer.nvidia.com/cuda-downloads",
         }
-        
-        lines.append(f"Please ensure {result.vendor.value} GPU drivers and tools are properly installed.")
+
+        lines.append(
+            f"Please ensure {result.vendor.value} GPU drivers and tools are properly installed."
+        )
         if result.vendor in vendor_docs:
             lines.append(f"Installation guide: {vendor_docs[result.vendor]}")
-        
+
         return "\n".join(lines)
 
 
@@ -740,20 +765,25 @@ def validate_rocm_installation(
         GPUInstallationError: If validation fails and raise_on_error is True
     """
     return validate_gpu_installation(
-        vendor=GPUVendor.AMD, verbose=verbose, raise_on_error=raise_on_error, rocm_path=rocm_path
+        vendor=GPUVendor.AMD,
+        verbose=verbose,
+        raise_on_error=raise_on_error,
+        rocm_path=rocm_path,
     )
 
 
 if __name__ == "__main__":
     # Command-line usage
     import sys
-    
-    verbose = '--verbose' in sys.argv or '-v' in sys.argv
-    result = validate_gpu_installation(vendor=None, verbose=verbose, raise_on_error=False)
-    
+
+    verbose = "--verbose" in sys.argv or "-v" in sys.argv
+    result = validate_gpu_installation(
+        vendor=None, verbose=verbose, raise_on_error=False
+    )
+
     if result.is_valid:
         print(f"\n✓ {result.vendor.value} GPU installation validated successfully")
         if result.version:
             print(f"Version: {result.version}")
-    
+
     sys.exit(0 if result.is_valid else 1)
diff --git a/src/madengine/utils/log_formatting.py b/src/madengine/utils/log_formatting.py
index 14a0eed5..a56ed46a 100644
--- a/src/madengine/utils/log_formatting.py
+++ b/src/madengine/utils/log_formatting.py
@@ -9,8 +9,8 @@
 """
 
 import pandas as pd
-from rich.table import Table
 from rich.console import Console as RichConsole
+from rich.table import Table
 
 
 def format_dataframe_for_log(
diff --git a/src/madengine/utils/nvidia_tool_manager.py b/src/madengine/utils/nvidia_tool_manager.py
index 73259b38..1cc9d9dc 100644
--- a/src/madengine/utils/nvidia_tool_manager.py
+++ b/src/madengine/utils/nvidia_tool_manager.py
@@ -25,38 +25,38 @@
 
 class NvidiaToolManager(BaseGPUToolManager):
     """NVIDIA CUDA tool manager with basic functionality.
-    
+
     Provides simple wrappers around NVIDIA tools while maintaining
     compatibility with BaseGPUToolManager interface.
-    
+
     Current implementation:
     - nvidia-smi for GPU queries
     - nvcc for CUDA version
     - Basic error handling
-    
+
     No version-aware tool selection yet (deferred for future work).
     """
-    
+
     # Tool paths
     NVIDIA_SMI_PATH = "/usr/bin/nvidia-smi"
     NVCC_PATH = "/usr/local/cuda/bin/nvcc"
-    
+
     def __init__(self):
         """Initialize NVIDIA tool manager."""
         super().__init__()
         self._log_debug("Initialized NVIDIA tool manager")
-    
+
     def get_version(self) -> Optional[str]:
         """Get CUDA version as string.
-        
+
         Returns:
             CUDA version string or None if unable to detect
         """
         return self.get_cuda_version()
-    
+
     def get_cuda_version(self) -> Optional[str]:
         """Get CUDA version from nvcc.
-        
+
         Returns:
             CUDA version string (e.g., "12.0") or None if unable to detect
         """
@@ -64,40 +64,40 @@ def get_cuda_version(self) -> Optional[str]:
         cached = self._get_cached_result("cuda_version")
         if cached is not None:
             return cached
-        
+
         try:
             # Try nvcc --version
             if self.is_tool_available(self.NVCC_PATH):
                 command = f"{self.NVCC_PATH} --version | sed -n 's/^.*release \\([0-9]\\+\\.[0-9]\\+\\).*$/\\1/p'"
                 success, stdout, stderr = self._execute_shell_command(command)
-                
+
                 if success and stdout:
                     version = stdout.strip()
                     self._cache_result("cuda_version", version)
                     self._log_info(f"CUDA version: {version}")
                     return version
-            
+
             # Fallback: Try nvidia-smi to get driver version
             if self.is_tool_available(self.NVIDIA_SMI_PATH):
                 command = f"{self.NVIDIA_SMI_PATH} --query | grep 'CUDA Version' | awk '{{print $4}}'"
                 success, stdout, stderr = self._execute_shell_command(command)
-                
+
                 if success and stdout:
                     version = stdout.strip()
                     self._cache_result("cuda_version", version)
                     self._log_info(f"CUDA version (from nvidia-smi): {version}")
                     return version
-            
+
             self._log_warning("Unable to detect CUDA version")
             return None
-            
+
         except Exception as e:
             self._log_error(f"Error detecting CUDA version: {e}")
             return None
-    
+
     def get_driver_version(self) -> Optional[str]:
         """Get NVIDIA driver version.
-        
+
         Returns:
             Driver version string or None if unable to detect
         """
@@ -105,54 +105,55 @@ def get_driver_version(self) -> Optional[str]:
         cached = self._get_cached_result("driver_version")
         if cached is not None:
             return cached
-        
+
         try:
             if self.is_tool_available(self.NVIDIA_SMI_PATH):
                 command = f"{self.NVIDIA_SMI_PATH} --query-gpu=driver_version --format=csv,noheader | head -n1"
                 success, stdout, stderr = self._execute_shell_command(command)
-                
+
                 if success and stdout:
                     version = stdout.strip()
                     self._cache_result("driver_version", version)
                     self._log_info(f"NVIDIA driver version: {version}")
                     return version
-            
+
             self._log_warning("Unable to detect NVIDIA driver version")
             return None
-            
+
         except Exception as e:
             self._log_error(f"Error detecting driver version: {e}")
             return None
-    
+
     def execute_command(
-        self,
-        command: str,
-        fallback_command: Optional[str] = None,
-        timeout: int = 30
+        self, command: str, fallback_command: Optional[str] = None, timeout: int = 30
     ) -> str:
         """Execute command with optional fallback.
-        
+
         Args:
             command: Primary command to execute
             fallback_command: Optional fallback command (currently not used for NVIDIA)
             timeout: Command timeout in seconds
-            
+
         Returns:
             Command output as string
-            
+
         Raises:
             RuntimeError: If command fails
         """
         success, stdout, stderr = self._execute_shell_command(command, timeout)
-        
+
         if success:
             return stdout
-        
+
         # Try fallback if provided
         if fallback_command:
-            self._log_warning(f"Primary command failed, trying fallback: {fallback_command[:50]}...")
-            success, stdout, stderr = self._execute_shell_command(fallback_command, timeout)
-            
+            self._log_warning(
+                f"Primary command failed, trying fallback: {fallback_command[:50]}..."
+            )
+            success, stdout, stderr = self._execute_shell_command(
+                fallback_command, timeout
+            )
+
             if success:
                 return stdout
             else:
@@ -164,17 +165,17 @@ def execute_command(
                 )
         else:
             raise RuntimeError(f"Command failed: {command}\nError: {stderr}")
-    
+
     def execute_nvidia_smi(self, args: str, timeout: int = 30) -> str:
         """Execute nvidia-smi with specified arguments.
-        
+
         Args:
             args: Arguments to pass to nvidia-smi
             timeout: Command timeout in seconds
-            
+
         Returns:
             Command output as string
-            
+
         Raises:
             RuntimeError: If nvidia-smi is not available or command fails
         """
@@ -183,20 +184,20 @@ def execute_nvidia_smi(self, args: str, timeout: int = 30) -> str:
                 f"nvidia-smi not found at {self.NVIDIA_SMI_PATH}\n"
                 f"Ensure NVIDIA drivers are installed."
             )
-        
+
         command = f"{self.NVIDIA_SMI_PATH} {args}"
         return self.execute_command(command, timeout=timeout)
-    
+
     def execute_nvcc(self, args: str, timeout: int = 30) -> str:
         """Execute nvcc with specified arguments.
-        
+
         Args:
             args: Arguments to pass to nvcc
             timeout: Command timeout in seconds
-            
+
         Returns:
             Command output as string
-            
+
         Raises:
             RuntimeError: If nvcc is not available or command fails
         """
@@ -205,16 +206,16 @@ def execute_nvcc(self, args: str, timeout: int = 30) -> str:
                 f"nvcc not found at {self.NVCC_PATH}\n"
                 f"Ensure CUDA toolkit is installed."
             )
-        
+
         command = f"{self.NVCC_PATH} {args}"
         return self.execute_command(command, timeout=timeout)
-    
+
     def get_gpu_count(self) -> int:
         """Get number of NVIDIA GPUs in the system.
-        
+
         Returns:
             Number of GPUs detected
-            
+
         Raises:
             RuntimeError: If unable to detect GPUs
         """
@@ -222,16 +223,16 @@ def get_gpu_count(self) -> int:
         cached = self._get_cached_result("gpu_count")
         if cached is not None:
             return cached
-        
+
         try:
             output = self.execute_nvidia_smi("-L | wc -l")
             count = int(output.strip())
-            
+
             self._cache_result("gpu_count", count)
             self._log_info(f"Detected {count} NVIDIA GPU(s)")
-            
+
             return count
-            
+
         except Exception as e:
             raise RuntimeError(
                 f"Unable to determine number of NVIDIA GPUs.\n"
@@ -240,16 +241,16 @@ def get_gpu_count(self) -> int:
                 f"- Verify NVIDIA drivers: nvidia-smi\n"
                 f"- Check GPU accessibility: ls -la /dev/nvidia*"
             )
-    
+
     def get_gpu_product_name(self, gpu_id: int = 0) -> str:
         """Get GPU product name.
-        
+
         Args:
             gpu_id: GPU index (0-based)
-            
+
         Returns:
             GPU product name (e.g., "NVIDIA H100 80GB HBM3")
-            
+
         Raises:
             RuntimeError: If unable to get product name
         """
@@ -257,34 +258,34 @@ def get_gpu_product_name(self, gpu_id: int = 0) -> str:
         cached = self._get_cached_result(cache_key)
         if cached:
             return cached
-        
+
         try:
             output = self.execute_nvidia_smi(
                 f"--query-gpu=name --format=csv,noheader,nounits -i {gpu_id}"
             )
             product_name = output.strip()
-            
+
             self._cache_result(cache_key, product_name)
             self._log_debug(f"GPU {gpu_id} product name: {product_name}")
-            
+
             return product_name
-            
+
         except Exception as e:
             raise RuntimeError(
                 f"Unable to get GPU product name for GPU {gpu_id}.\n"
                 f"Error: {e}\n"
                 f"Ensure GPU {gpu_id} exists: nvidia-smi -L"
             )
-    
+
     def get_gpu_architecture(self, gpu_id: int = 0) -> str:
         """Get GPU architecture/compute capability.
-        
+
         Args:
             gpu_id: GPU index (0-based)
-            
+
         Returns:
             GPU architecture string
-            
+
         Raises:
             RuntimeError: If unable to detect GPU architecture
         """
@@ -292,22 +293,21 @@ def get_gpu_architecture(self, gpu_id: int = 0) -> str:
         cached = self._get_cached_result(cache_key)
         if cached:
             return cached
-        
+
         try:
             # Get full GPU name which includes architecture info
             output = self.execute_nvidia_smi(
                 f"-L | head -n{gpu_id + 1} | tail -n1 | sed 's/(UUID: .*)//g' | sed 's/GPU {gpu_id}: //g'"
             )
             arch = output.strip()
-            
+
             self._cache_result(cache_key, arch)
             self._log_debug(f"GPU {gpu_id} architecture: {arch}")
-            
+
             return arch
-            
+
         except Exception as e:
             raise RuntimeError(
                 f"Unable to determine GPU architecture for GPU {gpu_id}.\n"
                 f"Error: {e}"
             )
-
diff --git a/src/madengine/utils/ops.py b/src/madengine/utils/ops.py
index cd717fec..7c4a2890 100644
--- a/src/madengine/utils/ops.py
+++ b/src/madengine/utils/ops.py
@@ -9,9 +9,10 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
+import sys
+
 # built-in modules
 import typing
-import sys
 
 
 # Class to both write and display stream, in "live" mode
diff --git a/src/madengine/utils/path_utils.py b/src/madengine/utils/path_utils.py
index 637efb8f..584408e5 100644
--- a/src/madengine/utils/path_utils.py
+++ b/src/madengine/utils/path_utils.py
@@ -30,4 +30,5 @@ def get_madengine_root() -> Path:
         Path to the madengine package root.
     """
     import madengine
+
     return Path(madengine.__file__).resolve().parent
diff --git a/src/madengine/utils/rocm_path_resolver.py b/src/madengine/utils/rocm_path_resolver.py
index 639e3aad..3e49e444 100644
--- a/src/madengine/utils/rocm_path_resolver.py
+++ b/src/madengine/utils/rocm_path_resolver.py
@@ -75,9 +75,7 @@ def __init__(
         environ: Optional[Mapping[str, str]] = None,
         which: Optional[WhichFn] = None,
     ) -> None:
-        self._environ: Mapping[str, str] = (
-            os.environ if environ is None else environ
-        )
+        self._environ: Mapping[str, str] = os.environ if environ is None else environ
         self._which: WhichFn = which if which is not None else shutil.which
 
     @staticmethod
@@ -98,11 +96,17 @@ def looks_like_rocm_root(self, root: Path) -> bool:
         if (root / "bin" / "rocminfo").is_file():
             return True
         # Versioned apt/tar layouts (e.g. /opt/rocm-7.13.0) and many TheRock images
-        if (root / "bin" / "amd-smi").is_file() and (root / "bin" / "rocm-smi").is_file():
+        if (root / "bin" / "amd-smi").is_file() and (
+            root / "bin" / "rocm-smi"
+        ).is_file():
             return True
-        if (root / "bin" / "rocm-smi").is_file() and (root / ".info" / "version").is_file():
+        if (root / "bin" / "rocm-smi").is_file() and (
+            root / ".info" / "version"
+        ).is_file():
             return True
-        if (root / "bin" / "amd-smi").is_file() and (root / ".info" / "version").is_file():
+        if (root / "bin" / "amd-smi").is_file() and (
+            root / ".info" / "version"
+        ).is_file():
             return True
         if (root / ".info" / "version").is_file():
             return True
@@ -120,7 +124,9 @@ def versioned_opt_rocm_dirs(self) -> List[Path]:
 
     def infer_from_path_tools(self) -> OptionalPathStr:
         """Use ``which`` on rocminfo, amd-smi, rocm-smi; return first plausible root."""
-        from madengine.utils import rocm_path_resolver as m  # same module; for patch hooks
+        from madengine.utils import (
+            rocm_path_resolver as m,
+        )  # same module; for patch hooks
 
         for name in ("rocminfo", "amd-smi", "rocm-smi"):
             w = self._which(name)  # type: ignore[operator]
@@ -133,7 +139,9 @@ def infer_from_path_tools(self) -> OptionalPathStr:
 
     def auto_detect(self) -> OptionalPathStr:
         """Heuristic search for a usable ROCm installation (see class doc + module doc)."""
-        from madengine.utils import rocm_path_resolver as m  # same module; for patch hooks
+        from madengine.utils import (
+            rocm_path_resolver as m,
+        )  # same module; for patch hooks
 
         opt = Path("/opt/rocm")
         if m._looks_like_rocm_root(opt):
@@ -404,9 +412,7 @@ def finalize_container_rocm_path(
     if oci:
         croot = normalize_rocm_path(oci)
         d["ROCM_PATH"] = croot
-        log(
-            f"ROCm container ROCM_PATH from image OCI config ({docker_image}): {croot}"
-        )
+        log(f"ROCm container ROCM_PATH from image OCI config ({docker_image}): {croot}")
         return croot
 
     log(
@@ -428,5 +434,3 @@ def finalize_container_rocm_path(
         f"{croot} (set docker_env_vars.ROCM_PATH if wrong)."
     )
     return croot
-
-
diff --git a/src/madengine/utils/rocm_tool_manager.py b/src/madengine/utils/rocm_tool_manager.py
index 60870d29..af6d9e63 100644
--- a/src/madengine/utils/rocm_tool_manager.py
+++ b/src/madengine/utils/rocm_tool_manager.py
@@ -25,20 +25,19 @@
 from madengine.core.constants import get_rocm_path
 from madengine.utils.gpu_tool_manager import BaseGPUToolManager
 
-
 # ROCm version threshold for amd-smi vs rocm-smi (from PR #54)
 ROCM_VERSION_THRESHOLD = (6, 4, 1)
 
 
 class ROCmToolManager(BaseGPUToolManager):
     """AMD ROCm tool manager with version-aware tool selection.
-    
+
     Features:
     - Automatic ROCm version detection from multiple sources
     - Version-aware tool selection (amd-smi >= 6.4.1, rocm-smi < 6.4.1)
     - Automatic fallback with warnings when preferred tool unavailable
     - Comprehensive error messages with troubleshooting suggestions
-    
+
     Tool Selection Logic:
     - ROCm >= 6.4.1: Prefer amd-smi, fallback to rocm-smi with warning
     - ROCm < 6.4.1: Use rocm-smi
@@ -59,10 +58,10 @@ def __init__(self, rocm_path: Optional[str] = None):
         self.ROCMINFO_PATH = os.path.join(self.rocm_path, "bin", "rocminfo")
         self.ROCM_VERSION_FILE = os.path.join(self.rocm_path, ".info", "version")
         self._log_debug("Initialized ROCm tool manager")
-    
+
     def get_version(self) -> Optional[str]:
         """Get ROCm version as string.
-        
+
         Returns:
             ROCm version string (e.g., "6.4.1") or None if unable to detect
         """
@@ -70,20 +69,20 @@ def get_version(self) -> Optional[str]:
         if version_tuple:
             return ".".join(map(str, version_tuple))
         return None
-    
+
     def get_rocm_version(self) -> Optional[Tuple[int, int, int]]:
         """Get ROCm version as tuple.
-        
+
         Tries multiple detection methods in order:
         1. hipconfig --version
         2. /opt/rocm/.info/version file
         3. rocminfo parsing
-        
+
         Results are cached for performance.
-        
+
         Returns:
             ROCm version as tuple (major, minor, patch) or None if unable to detect
-            
+
         Example:
             >>> manager = ROCmToolManager()
             >>> manager.get_rocm_version()
@@ -93,123 +92,134 @@ def get_rocm_version(self) -> Optional[Tuple[int, int, int]]:
         cached = self._get_cached_result("rocm_version")
         if cached is not None:
             return cached
-        
+
         version = None
-        
+
         # Method 1: Try hipconfig --version
         if self.is_tool_available(self.HIPCONFIG_PATH):
             success, stdout, stderr = self._execute_shell_command(
-                f"{self.HIPCONFIG_PATH} --version",
-                timeout=10
+                f"{self.HIPCONFIG_PATH} --version", timeout=10
             )
             if success and stdout:
                 # Parse version like "6.4.1-12345" -> (6, 4, 1)
                 try:
-                    version_str = stdout.split('-')[0].strip()
-                    parts = version_str.split('.')
+                    version_str = stdout.split("-")[0].strip()
+                    parts = version_str.split(".")
                     if len(parts) >= 3:
                         version = (int(parts[0]), int(parts[1]), int(parts[2]))
-                        self._log_debug(f"Detected ROCm version from hipconfig: {version}")
+                        self._log_debug(
+                            f"Detected ROCm version from hipconfig: {version}"
+                        )
                 except (ValueError, IndexError) as e:
-                    self._log_warning(f"Failed to parse hipconfig version '{stdout}': {e}")
-        
+                    self._log_warning(
+                        f"Failed to parse hipconfig version '{stdout}': {e}"
+                    )
+
         # Method 2: Try version file
         if version is None and os.path.exists(self.ROCM_VERSION_FILE):
             try:
-                with open(self.ROCM_VERSION_FILE, 'r') as f:
-                    version_str = f.read().strip().split('-')[0]
-                    parts = version_str.split('.')
+                with open(self.ROCM_VERSION_FILE, "r") as f:
+                    version_str = f.read().strip().split("-")[0]
+                    parts = version_str.split(".")
                     if len(parts) >= 3:
                         version = (int(parts[0]), int(parts[1]), int(parts[2]))
                         self._log_debug(f"Detected ROCm version from file: {version}")
             except (IOError, ValueError, IndexError) as e:
                 self._log_warning(f"Failed to read version file: {e}")
-        
+
         # Method 3: Try rocminfo (less reliable, last resort)
         if version is None and self.is_tool_available(self.ROCMINFO_PATH):
             success, stdout, stderr = self._execute_shell_command(
-                f"{self.ROCMINFO_PATH} | grep -i 'ROCm Version' | head -n1",
-                timeout=10
+                f"{self.ROCMINFO_PATH} | grep -i 'ROCm Version' | head -n1", timeout=10
             )
             if success and stdout:
                 try:
                     # Parse output like "ROCm Version: 6.4.1"
-                    match = re.search(r'(\d+)\.(\d+)\.(\d+)', stdout)
+                    match = re.search(r"(\d+)\.(\d+)\.(\d+)", stdout)
                     if match:
-                        version = (int(match.group(1)), int(match.group(2)), int(match.group(3)))
-                        self._log_debug(f"Detected ROCm version from rocminfo: {version}")
+                        version = (
+                            int(match.group(1)),
+                            int(match.group(2)),
+                            int(match.group(3)),
+                        )
+                        self._log_debug(
+                            f"Detected ROCm version from rocminfo: {version}"
+                        )
                 except (ValueError, AttributeError) as e:
                     self._log_warning(f"Failed to parse rocminfo output: {e}")
-        
+
         # Cache the result (even if None)
         self._cache_result("rocm_version", version)
-        
+
         if version:
             self._log_info(f"ROCm version: {'.'.join(map(str, version))}")
         else:
             self._log_warning("Unable to detect ROCm version from any source")
-        
+
         return version
-    
+
     def get_preferred_smi_tool(self) -> str:
         """Get the preferred SMI tool based on ROCm version.
-        
+
         Returns:
             Tool name: 'amd-smi' or 'rocm-smi'
-            
+
         Logic:
             - ROCm >= 6.4.1: Prefer amd-smi
             - ROCm < 6.4.1: Use rocm-smi
             - Unknown version: Try amd-smi first (conservative choice)
         """
         version = self.get_rocm_version()
-        
+
         if version is None:
             self._log_warning("ROCm version unknown, defaulting to amd-smi")
             return "amd-smi"
-        
+
         if version >= ROCM_VERSION_THRESHOLD:
             return "amd-smi"
         else:
             return "rocm-smi"
-    
+
     def execute_command(
-        self,
-        command: str,
-        fallback_command: Optional[str] = None,
-        timeout: int = 30
+        self, command: str, fallback_command: Optional[str] = None, timeout: int = 30
     ) -> str:
         """Execute command with optional fallback.
-        
+
         Args:
             command: Primary command to execute
             fallback_command: Optional fallback command if primary fails
             timeout: Command timeout in seconds
-            
+
         Returns:
             Command output as string
-            
+
         Raises:
             RuntimeError: If both primary and fallback commands fail
         """
         # Try primary command
         success, stdout, stderr = self._execute_shell_command(command, timeout)
-        
+
         if success:
             self._log_debug(f"Command succeeded: {command[:50]}...")
             return stdout
-        
+
         # Capture primary error before attempting fallback (fallback overwrites stderr)
         primary_stderr = stderr
-        self._log_warning(f"Primary command failed: {command[:50]}... Error: {primary_stderr}")
+        self._log_warning(
+            f"Primary command failed: {command[:50]}... Error: {primary_stderr}"
+        )
 
         # Try fallback if provided
         if fallback_command:
             self._log_info(f"Trying fallback command: {fallback_command[:50]}...")
-            success, stdout, stderr = self._execute_shell_command(fallback_command, timeout)
+            success, stdout, stderr = self._execute_shell_command(
+                fallback_command, timeout
+            )
 
             if success:
-                self._log_warning("Fallback command succeeded (primary tool may be missing or misconfigured)")
+                self._log_warning(
+                    "Fallback command succeeded (primary tool may be missing or misconfigured)"
+                )
                 return stdout
             else:
                 # Both failed
@@ -223,25 +233,27 @@ def execute_command(
         else:
             # No fallback, raise error
             raise RuntimeError(f"Command failed: {command}\nError: {stderr}")
-    
-    def execute_smi_command(self, command_template: str, use_amd_smi: bool = True, **kwargs) -> str:
+
+    def execute_smi_command(
+        self, command_template: str, use_amd_smi: bool = True, **kwargs
+    ) -> str:
         """Execute SMI command with automatic tool selection and fallback.
-        
+
         Args:
             command_template: Command template with {tool} placeholder
             use_amd_smi: If True, use amd-smi syntax; if False, use rocm-smi syntax
             **kwargs: Additional format parameters for command template
-            
+
         Returns:
             Command output as string
-            
+
         Example:
             >>> manager = ROCmToolManager()
             >>> # Will try amd-smi, fallback to rocm-smi if needed
             >>> output = manager.execute_smi_command("{tool} list --csv")
         """
         preferred_tool = self.get_preferred_smi_tool()
-        
+
         # Format command with preferred tool
         if preferred_tool == "amd-smi":
             tool_path = self.AMD_SMI_PATH
@@ -249,22 +261,22 @@ def execute_smi_command(self, command_template: str, use_amd_smi: bool = True, *
         else:
             tool_path = self.ROCM_SMI_PATH
             fallback_path = self.AMD_SMI_PATH
-        
+
         command = command_template.format(tool=tool_path, **kwargs)
-        
+
         # Create fallback command if fallback tool is available
         fallback_command = None
         if self.is_tool_available(fallback_path):
             fallback_command = command_template.format(tool=fallback_path, **kwargs)
-        
+
         return self.execute_command(command, fallback_command)
-    
+
     def get_gpu_count(self) -> int:
         """Get number of AMD GPUs in the system.
-        
+
         Returns:
             Number of GPUs detected
-            
+
         Raises:
             RuntimeError: If unable to detect GPUs with any tool
         """
@@ -272,9 +284,9 @@ def get_gpu_count(self) -> int:
         cached = self._get_cached_result("gpu_count")
         if cached is not None:
             return cached
-        
+
         preferred_tool = self.get_preferred_smi_tool()
-        
+
         try:
             if preferred_tool == "amd-smi":
                 # Try amd-smi list --csv
@@ -283,17 +295,21 @@ def get_gpu_count(self) -> int:
             else:
                 # Use rocm-smi
                 command = f"{self.ROCM_SMI_PATH} --showid --csv | tail -n +2 | wc -l"
-                fallback = f"{self.AMD_SMI_PATH} list --csv | tail -n +3 | wc -l" if self.is_tool_available(self.AMD_SMI_PATH) else None
-            
+                fallback = (
+                    f"{self.AMD_SMI_PATH} list --csv | tail -n +3 | wc -l"
+                    if self.is_tool_available(self.AMD_SMI_PATH)
+                    else None
+                )
+
             output = self.execute_command(command, fallback)
             count = int(output.strip())
-            
+
             # Cache result
             self._cache_result("gpu_count", count)
             self._log_info(f"Detected {count} AMD GPU(s)")
-            
+
             return count
-            
+
         except Exception as e:
             raise RuntimeError(
                 f"Unable to determine number of AMD GPUs.\n"
@@ -304,16 +320,16 @@ def get_gpu_count(self) -> int:
                 f"- Ensure user is in 'video' and 'render' groups\n"
                 f"- See: https://github.com/ROCm/TheRock"
             )
-    
+
     def get_gpu_product_name(self, gpu_id: int = 0) -> str:
         """Get GPU product name with fallback (from PR #54).
-        
+
         Args:
             gpu_id: GPU index (0-based)
-            
+
         Returns:
             GPU product name (e.g., "AMD Instinct MI300X")
-            
+
         Raises:
             RuntimeError: If unable to get product name with any tool
         """
@@ -321,9 +337,9 @@ def get_gpu_product_name(self, gpu_id: int = 0) -> str:
         cached = self._get_cached_result(cache_key)
         if cached:
             return cached
-        
+
         preferred_tool = self.get_preferred_smi_tool()
-        
+
         try:
             if preferred_tool == "amd-smi":
                 # Try amd-smi static command
@@ -334,17 +350,21 @@ def get_gpu_product_name(self, gpu_id: int = 0) -> str:
                 # Use rocm-smi
                 command = f"{self.ROCM_SMI_PATH} --showproductname | grep 'GPU\\[{gpu_id}\\]' | awk '{{print $NF}}'"
                 # Fallback to amd-smi if available
-                fallback = f"{self.AMD_SMI_PATH} static -g {gpu_id} | grep MARKET_NAME: | cut -d ':' -f 2" if self.is_tool_available(self.AMD_SMI_PATH) else None
-            
+                fallback = (
+                    f"{self.AMD_SMI_PATH} static -g {gpu_id} | grep MARKET_NAME: | cut -d ':' -f 2"
+                    if self.is_tool_available(self.AMD_SMI_PATH)
+                    else None
+                )
+
             output = self.execute_command(command, fallback)
             product_name = output.strip()
-            
+
             # Cache result
             self._cache_result(cache_key, product_name)
             self._log_debug(f"GPU {gpu_id} product name: {product_name}")
-            
+
             return product_name
-            
+
         except Exception as e:
             raise RuntimeError(
                 f"Unable to get GPU product name for GPU {gpu_id}.\n"
@@ -354,13 +374,13 @@ def get_gpu_product_name(self, gpu_id: int = 0) -> str:
                 f"- Check ROCm version: cat {self.ROCM_VERSION_FILE}\n"
                 f"- For ROCm >= 6.4.1, ensure amd-smi is installed"
             )
-    
+
     def get_gpu_architecture(self) -> str:
         """Get GPU architecture (e.g., gfx908, gfx90a, gfx942).
-        
+
         Returns:
             GPU architecture string
-            
+
         Raises:
             RuntimeError: If unable to detect GPU architecture
         """
@@ -368,12 +388,12 @@ def get_gpu_architecture(self) -> str:
         cached = self._get_cached_result("gpu_architecture")
         if cached:
             return cached
-        
+
         try:
             # Use rocminfo to get architecture (most reliable)
             command = f"{self.ROCMINFO_PATH} | grep -o -m 1 'gfx.*'"
             success, stdout, stderr = self._execute_shell_command(command)
-            
+
             if success and stdout:
                 arch = stdout.strip()
                 self._cache_result("gpu_architecture", arch)
@@ -381,7 +401,7 @@ def get_gpu_architecture(self) -> str:
                 return arch
             else:
                 raise RuntimeError(f"rocminfo failed or returned empty: {stderr}")
-                
+
         except Exception as e:
             raise RuntimeError(
                 f"Unable to determine GPU architecture.\n"
@@ -391,13 +411,13 @@ def get_gpu_architecture(self) -> str:
                 f"- Check GPU is visible: {self.ROCM_SMI_PATH} --showid\n"
                 f"- Ensure ROCm is properly installed"
             )
-    
+
     def get_gpu_vendor_check(self) -> str:
         """Check GPU vendor with fallback (from PR #54).
-        
+
         Returns:
             "AMD" if AMD GPU detected, error message otherwise
-            
+
         Note:
             This checks if AMD SMI tools can detect GPUs, confirming AMD vendor.
         """
@@ -410,46 +430,47 @@ def get_gpu_vendor_check(self) -> str:
                 return "No AMD GPUs detected"
         except Exception as e:
             return f"Unable to detect AMD GPU vendor: {e}"
-    
+
     def list_gpus_json(self) -> List[Dict]:
         """List all GPUs with detailed information in JSON format.
-        
+
         Returns:
             List of GPU information dictionaries
-            
+
         Raises:
             RuntimeError: If unable to list GPUs
         """
         preferred_tool = self.get_preferred_smi_tool()
-        
+
         try:
-            if preferred_tool == "amd-smi" and self.is_tool_available(self.AMD_SMI_PATH):
+            if preferred_tool == "amd-smi" and self.is_tool_available(
+                self.AMD_SMI_PATH
+            ):
                 # Try amd-smi list with JSON output
                 command = f"{self.AMD_SMI_PATH} list --json"
                 success, stdout, stderr = self._execute_shell_command(command)
-                
+
                 if success and stdout:
                     try:
                         return json.loads(stdout)
                     except json.JSONDecodeError as e:
                         self._log_warning(f"Failed to parse amd-smi JSON: {e}")
-            
+
             # Fallback: parse rocm-smi output
             command = f"{self.ROCM_SMI_PATH} --showid"
             output = self.execute_command(command)
-            
+
             # Parse rocm-smi output to JSON-like structure
             gpus = []
-            for line in output.split('\n'):
-                if 'GPU[' in line:
+            for line in output.split("\n"):
+                if "GPU[" in line:
                     try:
-                        gpu_id = int(line.split('[')[1].split(']')[0])
+                        gpu_id = int(line.split("[")[1].split("]")[0])
                         gpus.append({"gpu": gpu_id, "node_id": gpu_id})
                     except (IndexError, ValueError):
                         continue
-            
+
             return gpus
-            
+
         except Exception as e:
             raise RuntimeError(f"Unable to list GPUs: {e}")
-
diff --git a/src/madengine/utils/session_tracker.py b/src/madengine/utils/session_tracker.py
index 4449e496..8eeed31a 100644
--- a/src/madengine/utils/session_tracker.py
+++ b/src/madengine/utils/session_tracker.py
@@ -14,18 +14,18 @@
 class SessionTracker:
     """
     Tracks execution session boundaries for filtering performance results.
-    
+
     When an execution starts, it records the current row count in perf.csv.
     After execution, results can be filtered to show only rows added during this session.
-    
+
     Best Practice: Session marker file is stored in the SAME directory as perf.csv
     to ensure consistent access regardless of working directory changes.
     """
-    
+
     def __init__(self, perf_csv_path: str = "perf.csv"):
         """
         Initialize session tracker.
-        
+
         Args:
             perf_csv_path: Path to the performance CSV file
         """
@@ -33,22 +33,22 @@ def __init__(self, perf_csv_path: str = "perf.csv"):
         self.session_start_row: Optional[int] = None
         # Marker file in same directory as perf.csv
         self.marker_file = self.perf_csv_path.parent / ".madengine_session_start"
-    
+
     def start_session(self) -> int:
         """
         Mark the start of an execution session.
-        
+
         Records the current number of rows in perf.csv so we can later
         identify which rows were added during this session.
-        
+
         Also saves the marker file for use by child processes.
-        
+
         Returns:
             The starting row number (number of rows in CSV before this session)
         """
         if self.perf_csv_path.exists():
             # Count existing data rows (excluding header and blank lines)
-            with open(self.perf_csv_path, 'r') as f:
+            with open(self.perf_csv_path, "r") as f:
                 lines = f.readlines()
                 non_empty = [l for l in lines if l.strip()]
                 # Subtract 1 for header row
@@ -56,44 +56,44 @@ def start_session(self) -> int:
         else:
             # No existing file, start at 0
             self.session_start_row = 0
-        
+
         # Automatically save marker for child processes
         self._save_marker(self.session_start_row)
-        
+
         return self.session_start_row
-    
+
     def get_session_row_count(self) -> int:
         """
         Get the number of rows added during this session.
-        
+
         Returns:
             Number of rows added since session start
         """
         if self.session_start_row is None:
             return 0
-        
+
         if not self.perf_csv_path.exists():
             return 0
-        
-        with open(self.perf_csv_path, 'r') as f:
+
+        with open(self.perf_csv_path, "r") as f:
             lines = f.readlines()
             non_empty = [l for l in lines if l.strip()]
             current_row_count = max(0, len(non_empty) - 1)  # Exclude header
-        
+
         return current_row_count - self.session_start_row
-    
+
     def _save_marker(self, start_row: int):
         """
         Save session start marker to file (private method).
-        
+
         Args:
             start_row: The starting row number
         """
         # Ensure parent directory exists
         self.marker_file.parent.mkdir(parents=True, exist_ok=True)
-        with open(self.marker_file, 'w') as f:
+        with open(self.marker_file, "w") as f:
             f.write(str(start_row))
-    
+
     def cleanup_marker(self):
         """
         Remove session marker file for this instance.
@@ -103,28 +103,27 @@ def cleanup_marker(self):
                 os.remove(self.marker_file)
             except OSError:
                 pass
-    
+
     @staticmethod
     def load_session_marker_for_csv(perf_csv_path: str = "perf.csv") -> Optional[int]:
         """
         Static helper to load session marker for a given CSV path.
-        
+
         This is useful when you don't have a SessionTracker instance but need to load the marker.
-        
+
         Args:
             perf_csv_path: Path to the performance CSV file
-            
+
         Returns:
             Session start row, or None if marker doesn't exist
         """
         perf_path = Path(perf_csv_path).resolve()
         marker_file = perf_path.parent / ".madengine_session_start"
-        
+
         if marker_file.exists():
             try:
-                with open(marker_file, 'r') as f:
+                with open(marker_file, "r") as f:
                     return int(f.read().strip())
             except (ValueError, IOError):
                 return None
         return None
-
diff --git a/tests/conftest.py b/tests/conftest.py
index 91241e01..f7c0d937 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -17,13 +17,14 @@
 if _SRC.is_dir() and str(_SRC) not in sys.path:
     sys.path.insert(0, str(_SRC))
 from unittest.mock import MagicMock, patch
-import pytest
 
+import pytest
 
 # ============================================================================
 # Platform Configuration Fixtures
 # ============================================================================
 
+
 @pytest.fixture
 def amd_gpu_context():
     """Mock Context for AMD GPU platform (ROCm)."""
@@ -95,6 +96,7 @@ def multi_platform_context(request, amd_gpu_context, nvidia_gpu_context, cpu_con
 # Mock Args Fixtures
 # ============================================================================
 
+
 @pytest.fixture
 def mock_build_args():
     """Mock args for build command."""
@@ -149,6 +151,7 @@ def mock_run_args():
 # Test Data Fixtures
 # ============================================================================
 
+
 @pytest.fixture
 def sample_models():
     """Sample model data for testing."""
@@ -273,17 +276,16 @@ def sample_manifest():
 # Temporary File Fixtures
 # ============================================================================
 
+
 @pytest.fixture
 def temp_manifest_file(sample_manifest):
     """Create a temporary manifest file."""
-    with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".json", delete=False
-    ) as f:
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
         json.dump(sample_manifest, f)
         manifest_path = f.name
-    
+
     yield manifest_path
-    
+
     # Cleanup
     if os.path.exists(manifest_path):
         os.unlink(manifest_path)
@@ -295,9 +297,9 @@ def temp_working_dir():
     with tempfile.TemporaryDirectory() as tmpdir:
         original_cwd = os.getcwd()
         os.chdir(tmpdir)
-        
+
         yield tmpdir
-        
+
         os.chdir(original_cwd)
 
 
@@ -305,6 +307,7 @@ def temp_working_dir():
 # Mock Builder and Runner Fixtures
 # ============================================================================
 
+
 @pytest.fixture
 def mock_docker_builder(sample_build_summary_success):
     """Mock DockerBuilder with successful builds."""
@@ -349,6 +352,7 @@ def mock_container_runner():
 # Integration Test Helpers
 # ============================================================================
 
+
 @pytest.fixture
 def integration_test_env():
     """Setup integration test environment variables."""
@@ -356,7 +360,7 @@ def integration_test_env():
         "MODEL_DIR": "tests/fixtures/dummy",
         "MAD_SKIP_GPU_CHECK": "1",  # Skip actual GPU detection in tests
     }
-    
+
     with patch.dict(os.environ, env_vars, clear=False):
         yield env_vars
 
@@ -365,26 +369,17 @@ def integration_test_env():
 # Pytest Configuration
 # ============================================================================
 
+
 def pytest_configure(config):
     """Configure pytest with custom markers."""
     config.addinivalue_line(
         "markers", "integration: marks tests as integration tests (may be slow)"
     )
-    config.addinivalue_line(
-        "markers", "unit: marks tests as fast unit tests"
-    )
-    config.addinivalue_line(
-        "markers", "gpu: marks tests that require GPU hardware"
-    )
-    config.addinivalue_line(
-        "markers", "amd: marks tests specific to AMD GPUs"
-    )
-    config.addinivalue_line(
-        "markers", "nvidia: marks tests specific to NVIDIA GPUs"
-    )
-    config.addinivalue_line(
-        "markers", "cpu: marks tests for CPU-only execution"
-    )
+    config.addinivalue_line("markers", "unit: marks tests as fast unit tests")
+    config.addinivalue_line("markers", "gpu: marks tests that require GPU hardware")
+    config.addinivalue_line("markers", "amd: marks tests specific to AMD GPUs")
+    config.addinivalue_line("markers", "nvidia: marks tests specific to NVIDIA GPUs")
+    config.addinivalue_line("markers", "cpu: marks tests for CPU-only execution")
     config.addinivalue_line(
         "markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')"
     )
@@ -394,40 +389,42 @@ def pytest_configure(config):
 # Utility Functions for Tests
 # ============================================================================
 
+
 def assert_build_manifest_valid(manifest_path):
     """Assert that a build manifest file is valid."""
     assert os.path.exists(manifest_path), f"Manifest not found: {manifest_path}"
-    
+
     with open(manifest_path) as f:
         manifest = json.load(f)
-    
+
     # Check required keys
     assert "built_images" in manifest
     assert "built_models" in manifest
     assert "summary" in manifest
-    
+
     # Check summary structure
     summary = manifest["summary"]
     assert "successful_builds" in summary
     assert "failed_builds" in summary
     assert isinstance(summary["successful_builds"], list)
     assert isinstance(summary["failed_builds"], list)
-    
+
     return manifest
 
 
 def assert_perf_csv_valid(csv_path):
     """Assert that a performance CSV file is valid."""
     assert os.path.exists(csv_path), f"Performance CSV not found: {csv_path}"
-    
+
     import pandas as pd
+
     df = pd.read_csv(csv_path)
-    
+
     # Check required columns
     required_columns = ["model", "n_gpus", "gpu_architecture", "status"]
     for col in required_columns:
         assert col in df.columns, f"Missing column: {col}"
-    
+
     return df
 
 
@@ -436,4 +433,3 @@ def assert_perf_csv_valid(csv_path):
     "assert_build_manifest_valid",
     "assert_perf_csv_valid",
 ]
-
diff --git a/tests/e2e/test_build_workflows.py b/tests/e2e/test_build_workflows.py
index 9cc74438..179762f7 100644
--- a/tests/e2e/test_build_workflows.py
+++ b/tests/e2e/test_build_workflows.py
@@ -10,25 +10,30 @@
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
-# built-in modules
-import os
 import csv
 import json
-import pandas as pd
 
-# 3rd party modules
-import pytest
+# built-in modules
+import os
 
 # project modules
 import shutil
 
-from tests.fixtures.utils import BASE_DIR, MODEL_DIR
-from tests.fixtures.utils import global_data
-from tests.fixtures.utils import clean_test_temp_files
-from tests.fixtures.utils import DEFAULT_CLEAN_FILES
-from tests.fixtures.utils import generate_additional_context_for_machine
-from tests.fixtures.utils import get_gpu_arch
-from tests.fixtures.utils import requires_gpu
+import pandas as pd
+
+# 3rd party modules
+import pytest
+
+from tests.fixtures.utils import (
+    BASE_DIR,
+    DEFAULT_CLEAN_FILES,
+    MODEL_DIR,
+    clean_test_temp_files,
+    generate_additional_context_for_machine,
+    get_gpu_arch,
+    global_data,
+    requires_gpu,
+)
 
 
 @pytest.fixture
@@ -58,11 +63,11 @@ def dynamic_skip_gpu_arch_model_dir(tmp_path):
     return str(temp_model_dir)
 
 
-
 # ============================================================================
 # Build CLI Features Tests
 # ============================================================================
 
+
 class TestCLIFeatures:
     """Test various CLI features and command-line argument behaviors."""
 
@@ -99,7 +104,9 @@ def test_output_commandline_argument_writes_csv_correctly(
         if not success:
             pytest.fail("model, dummy, not found in perf_test.csv.")
 
-    @requires_gpu("skip_gpu_arch filtering requires GPU hardware to detect current architecture")
+    @requires_gpu(
+        "skip_gpu_arch filtering requires GPU hardware to detect current architecture"
+    )
     @pytest.mark.parametrize(
         "clean_test_temp_files", [["perf_test.csv", "perf_test.html"]], indirect=True
     )
@@ -124,7 +131,9 @@ def test_commandline_argument_skip_gpu_arch(
         if "Skipping model" not in output:
             pytest.fail("Enable skipping gpu arch for running model is failed.")
 
-    @requires_gpu("skip_gpu_arch filtering requires GPU hardware to detect current architecture")
+    @requires_gpu(
+        "skip_gpu_arch filtering requires GPU hardware to detect current architecture"
+    )
     @pytest.mark.parametrize(
         "clean_test_temp_files", [["perf_test.csv", "perf_test.html"]], indirect=True
     )
@@ -158,41 +167,46 @@ def test_output_multi_results(self, global_data, clean_test_temp_files):
         UPDATED: Now uses python3 -m madengine.cli.app instead of legacy mad.py
         """
         context = generate_additional_context_for_machine()
-        output = global_data['console'].sh(
-            "cd " + BASE_DIR + "; " + 
-            "MODEL_DIR=" + MODEL_DIR + " " + 
-            f"python3 -m madengine.cli.app run --tags dummy_multi --live-output --additional-context '{json.dumps(context)}'"
+        output = global_data["console"].sh(
+            "cd "
+            + BASE_DIR
+            + "; "
+            + "MODEL_DIR="
+            + MODEL_DIR
+            + " "
+            + f"python3 -m madengine.cli.app run --tags dummy_multi --live-output --additional-context '{json.dumps(context)}'"
         )
         # Check if multiple results are written to perf_dummy.csv
         success = False
         # Read the csv file to a dataframe using pandas
-        multi_df = pd.read_csv(os.path.join(BASE_DIR, 'perf_dummy.csv'))
+        multi_df = pd.read_csv(os.path.join(BASE_DIR, "perf_dummy.csv"))
         # Check the number of rows in the dataframe is 4, and columns is 4
         if multi_df.shape == (4, 4):
             success = True
         if not success:
             pytest.fail("The generated multi results is not correct.")
         # Check if multiple results from perf_dummy.csv get copied over to perf.csv
-        perf_df = pd.read_csv(os.path.join(BASE_DIR, 'perf.csv'))
+        perf_df = pd.read_csv(os.path.join(BASE_DIR, "perf.csv"))
         # Get the corresponding rows and columns from perf.csv
         perf_df = perf_df[multi_df.columns]
         perf_df = perf_df.iloc[-4:, :]
         # Drop model columns from both dataframes; these will not match
         # if multiple results csv has {model}, then perf csv has {tag_name}_{model}
-        multi_df = multi_df.drop('model', axis=1)
-        perf_df = perf_df.drop('model', axis=1)
+        multi_df = multi_df.drop("model", axis=1)
+        perf_df = perf_df.drop("model", axis=1)
         if all(perf_df.columns == multi_df.columns):
             success = True
         if not success:
-            pytest.fail("The columns of the generated multi results do not match perf.csv.")
-
-
+            pytest.fail(
+                "The columns of the generated multi results do not match perf.csv."
+            )
 
 
 # ============================================================================
 # Model Discovery Tests
 # ============================================================================
 
+
 class TestDiscover:
     """Test the model discovery feature."""
 
@@ -316,5 +330,3 @@ def test_multiple(self, global_data, clean_test_temp_files):
                         success = True
         if not success:
             pytest.fail("multiple tags did not run successfully.")
-
-
diff --git a/tests/e2e/test_data_workflows.py b/tests/e2e/test_data_workflows.py
index b83232d5..e1f3f7e2 100644
--- a/tests/e2e/test_data_workflows.py
+++ b/tests/e2e/test_data_workflows.py
@@ -3,23 +3,28 @@
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
+import csv
+import json
+
 # built-in modules
 import os
-import csv
 import re
-import json
 import tempfile
 
 # third-party modules
 import pytest
 
-# project modules
-from tests.fixtures.utils import BASE_DIR, MODEL_DIR
-from tests.fixtures.utils import global_data
-from tests.fixtures.utils import clean_test_temp_files
-from tests.fixtures.utils import DEFAULT_CLEAN_FILES
 from madengine.core.dataprovider import Data
 
+# project modules
+from tests.fixtures.utils import (
+    BASE_DIR,
+    DEFAULT_CLEAN_FILES,
+    MODEL_DIR,
+    clean_test_temp_files,
+    global_data,
+)
+
 
 class TestDataProviders:
 
diff --git a/tests/e2e/test_execution_features.py b/tests/e2e/test_execution_features.py
index 4d7fd601..1399637d 100644
--- a/tests/e2e/test_execution_features.py
+++ b/tests/e2e/test_execution_features.py
@@ -22,11 +22,11 @@
 )
 
 
-
 # ============================================================================
 # Timeout Feature Tests
 # ============================================================================
 
+
 class TestCustomTimeoutsFunctionality:
 
     @pytest.mark.parametrize(
@@ -42,7 +42,13 @@ class TestCustomTimeoutsFunctionality:
         ],
     )
     def test_timeout_value_in_log(
-        self, global_data, clean_test_temp_files, tags, log_base_name, expected_seconds, extra_args
+        self,
+        global_data,
+        clean_test_temp_files,
+        tags,
+        log_base_name,
+        expected_seconds,
+        extra_args,
     ):
         """
         Timeout is set as expected (default 2h, model override, CLI override).
@@ -113,11 +119,11 @@ def test_timeout_in_model_timesout_correctly(
         assert test_duration == pytest.approx(120, 20)
 
 
-
 # ============================================================================
 # Debugging Feature Tests
 # ============================================================================
 
+
 class TestDebuggingFunctionality:
     """"""
 
@@ -284,10 +290,12 @@ def test_no_keepModelDir_does_not_keep_model_dir(
                 "model directory left over after not specifying keep-model-dir (or keep-alive) argument."
             )
 
+
 # ============================================================================
 # Live Output Feature Tests
 # ============================================================================
 
+
 class TestLiveOutputFunctionality:
     """Test the live output functionality."""
 
@@ -344,5 +352,3 @@ def test_liveOutput_prints_output_to_screen(
 
         if "ARG BASE_DOCKER=" not in output:
             pytest.fail("default run is silent")
-
-
diff --git a/tests/e2e/test_profiling_workflows.py b/tests/e2e/test_profiling_workflows.py
index 74925ae2..3f06af91 100644
--- a/tests/e2e/test_profiling_workflows.py
+++ b/tests/e2e/test_profiling_workflows.py
@@ -15,13 +15,13 @@
 # project modules
 from tests.fixtures.utils import (
     BASE_DIR,
-    MODEL_DIR,
     DEFAULT_CLEAN_FILES,
-    global_data,
+    MODEL_DIR,
     clean_test_temp_files,
-    requires_gpu,
-    is_nvidia,
     generate_additional_context_for_machine,
+    global_data,
+    is_nvidia,
+    requires_gpu,
 )
 
 
@@ -47,14 +47,14 @@ def test_rocprof_profiling_tool_runs_correctly(
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context '{\"gpu_vendor\": \"AMD\", \"guest_os\": \"UBUNTU\", \"tools\": [{\"name\": \"rocprof\"}]}' ",
+            + 'python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context \'{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "tools": [{"name": "rocprof"}]}\' ',
             canFail=True,
         )
 
         # Check for both legacy rocprof (results.csv) and rocprofv3 (.db files) output
         rocprof_output_dir = os.path.join(BASE_DIR, "rocprof_output")
         legacy_output = os.path.join(rocprof_output_dir, "results.csv")
-        
+
         # Check for rocprofv3 .db files in subdirectories
         rocprofv3_output_found = False
         if os.path.exists(rocprof_output_dir):
@@ -65,7 +65,7 @@ def test_rocprof_profiling_tool_runs_correctly(
                         break
                 if rocprofv3_output_found:
                     break
-        
+
         if not os.path.exists(legacy_output) and not rocprofv3_output_found:
             pytest.fail(
                 "Neither rocprof_output/results.csv (legacy) nor *_results.db (rocprofv3) generated with rocprof profiling run."
@@ -92,7 +92,7 @@ def test_rocm_trace_lite_profiling_tool_runs_correctly(
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context '{\"gpu_vendor\": \"AMD\", \"guest_os\": \"UBUNTU\", \"tools\": [{\"name\": \"rocm_trace_lite\"}]}' ",
+            + 'python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context \'{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "tools": [{"name": "rocm_trace_lite"}]}\' ',
             canFail=True,
         )
 
@@ -122,7 +122,7 @@ def test_rpd_profiling_tool_runs_correctly(
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context '{\"gpu_vendor\": \"AMD\", \"guest_os\": \"UBUNTU\", \"tools\": [{\"name\": \"rpd\"}]}' ",
+            + 'python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context \'{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "tools": [{"name": "rpd"}]}\' ',
             canFail=True,
         )
 
@@ -148,7 +148,7 @@ def test_gpu_info_power_profiling_tool_runs_correctly(
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context '{\"gpu_vendor\": \"AMD\", \"guest_os\": \"UBUNTU\", \"tools\": [{\"name\": \"gpu_info_power_profiler\"}]}' ",
+            + 'python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context \'{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "tools": [{"name": "gpu_info_power_profiler"}]}\' ',
             canFail=False,
         )
 
@@ -178,7 +178,7 @@ def test_gpu_info_vram_profiling_tool_runs_correctly(
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context '{\"gpu_vendor\": \"AMD\", \"guest_os\": \"UBUNTU\", \"tools\": [{\"name\": \"gpu_info_vram_profiler\"}]}' ",
+            + 'python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context \'{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "tools": [{"name": "gpu_info_vram_profiler"}]}\' ',
             canFail=False,
         )
 
@@ -206,7 +206,7 @@ def test_rocblas_trace_runs_correctly(self, global_data, clean_test_temp_files):
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context '{\"gpu_vendor\": \"AMD\", \"guest_os\": \"UBUNTU\", \"tools\": [{\"name\": \"rocblas_trace\"}]}' ",
+            + 'python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context \'{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "tools": [{"name": "rocblas_trace"}]}\' ',
             canFail=False,
         )
 
@@ -242,7 +242,7 @@ def test_tensile_trace_runs_correctly(self, global_data, clean_test_temp_files):
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context '{\"gpu_vendor\": \"AMD\", \"guest_os\": \"UBUNTU\", \"tools\": [{\"name\": \"tensile_trace\"}]}' ",
+            + 'python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context \'{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "tools": [{"name": "tensile_trace"}]}\' ',
             canFail=True,  # Allow failure due to missing performance metrics (trace tools suppress performance output)
         )
 
@@ -278,7 +278,7 @@ def test_miopen_trace_runs_correctly(self, global_data, clean_test_temp_files):
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context '{\"gpu_vendor\": \"AMD\", \"guest_os\": \"UBUNTU\", \"tools\": [{\"name\": \"miopen_trace\"}]}' ",
+            + 'python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context \'{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "tools": [{"name": "miopen_trace"}]}\' ',
             canFail=False,
         )
 
@@ -312,7 +312,7 @@ def test_rccl_trace_runs_correctly(self, global_data, clean_test_temp_files):
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_prof_rccl --additional-context '{\"gpu_vendor\": \"AMD\", \"guest_os\": \"UBUNTU\", \"tools\": [{\"name\": \"rccl_trace\"}]}' ",
+            + 'python3 -m madengine.cli.app run --live-output --tags dummy_prof_rccl --additional-context \'{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "tools": [{"name": "rccl_trace"}]}\' ',
             canFail=False,
         )
 
@@ -353,7 +353,7 @@ def test_toolA_runs_correctly(self, global_data, clean_test_temp_files):
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy --additional-context '{\"gpu_vendor\": \"AMD\", \"guest_os\": \"UBUNTU\", \"tools\": [{\"name\": \"test_tools_A\"}]}' ",
+            + 'python3 -m madengine.cli.app run --live-output --tags dummy --additional-context \'{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "tools": [{"name": "test_tools_A"}]}\' ',
             canFail=False,
         )
 
@@ -399,7 +399,7 @@ def test_stackable_design_runs_correctly(self, global_data, clean_test_temp_file
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy --additional-context '{\"gpu_vendor\": \"AMD\", \"guest_os\": \"UBUNTU\", \"tools\": [{\"name\": \"test_tools_A\"}, {\"name\": \"test_tools_B\"}]}' ",
+            + 'python3 -m madengine.cli.app run --live-output --tags dummy --additional-context \'{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "tools": [{"name": "test_tools_A"}, {"name": "test_tools_B"}]}\' ',
             canFail=False,
         )
 
@@ -462,13 +462,13 @@ def test_can_change_default_behavior_of_profiling_tool_with_additionalContext(
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            + "python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context '{\"gpu_vendor\": \"AMD\", \"guest_os\": \"UBUNTU\", \"tools\": [{\"name\": \"rocprof\", \"cmd\": \"bash ../scripts/common/tools/rocprof_wrapper.sh --sys-trace --\"}]}' ",
+            + 'python3 -m madengine.cli.app run --live-output --tags dummy_prof --additional-context \'{"gpu_vendor": "AMD", "guest_os": "UBUNTU", "tools": [{"name": "rocprof", "cmd": "bash ../scripts/common/tools/rocprof_wrapper.sh --sys-trace --"}]}\' ',
             canFail=True,
         )
 
         # Check for profiling output (either legacy or rocprofv3 format)
         rocprof_output_dir = os.path.join(BASE_DIR, "rocprof_output")
-        
+
         # For rocprofv3 with --sys-trace, check for .db files
         rocprofv3_output_found = False
         if os.path.exists(rocprof_output_dir):
@@ -479,10 +479,12 @@ def test_can_change_default_behavior_of_profiling_tool_with_additionalContext(
                         break
                 if rocprofv3_output_found:
                     break
-        
+
         # Legacy check for results files
-        legacy_output = os.path.exists(os.path.join(BASE_DIR, "rocprof_output", "results.csv"))
-        
+        legacy_output = os.path.exists(
+            os.path.join(BASE_DIR, "rocprof_output", "results.csv")
+        )
+
         if not legacy_output and not rocprofv3_output_found:
             pytest.fail(
                 "No profiling output generated with custom rocprof command override."
diff --git a/tests/e2e/test_run_workflows.py b/tests/e2e/test_run_workflows.py
index 32f6141b..a5e09112 100644
--- a/tests/e2e/test_run_workflows.py
+++ b/tests/e2e/test_run_workflows.py
@@ -3,36 +3,38 @@
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
+import csv
+import json
+
 # built-in modules
 import os
-import csv
 
 # third-party modules
 import pytest
-import json
+
+from madengine.core.context import Context
 
 # project modules
-from tests.fixtures.utils import BASE_DIR, MODEL_DIR
-from tests.fixtures.utils import global_data
-from tests.fixtures.utils import clean_test_temp_files
-from tests.fixtures.utils import get_gpu_nodeid_map
-from tests.fixtures.utils import get_num_gpus
-from tests.fixtures.utils import get_num_cpus
-from tests.fixtures.utils import requires_gpu
-from tests.fixtures.utils import generate_additional_context_for_machine
 from tests.fixtures.utils import (
+    BASE_DIR,
     DEFAULT_CLEAN_FILES,
-    build_run_command,
+    MODEL_DIR,
     assert_model_in_perf_csv,
+    build_run_command,
+    clean_test_temp_files,
+    generate_additional_context_for_machine,
+    get_gpu_nodeid_map,
+    get_num_cpus,
+    get_num_gpus,
+    global_data,
+    requires_gpu,
 )
 
-from madengine.core.context import Context
-
-
 # ============================================================================
 # Context Handling Tests
 # ============================================================================
 
+
 class TestContexts:
 
     @pytest.mark.parametrize(
@@ -331,9 +333,7 @@ def test_docker_mounts_mount_host_paths_in_docker_container(
             )
 
     @requires_gpu("docker gpus requires GPU hardware")
-    @pytest.mark.skipif(
-        get_num_gpus() < 8, reason="test requires atleast 8 gpus"
-    )
+    @pytest.mark.skipif(get_num_gpus() < 8, reason="test requires atleast 8 gpus")
     @pytest.mark.parametrize(
         "clean_test_temp_files",
         [["perf.csv", "perf.html", "results_dummy_gpubind.csv"]],
@@ -364,24 +364,24 @@ def test_docker_gpus(self, global_data, clean_test_temp_files):
                         gpu_node_ids.append(row["performance"])
                     else:
                         pytest.fail("model in perf_test.csv did not run successfully.")
-        
+
         # Debug information
         print(f"GPU node IDs from performance: {gpu_node_ids}")
         print(f"GPU nodeid map: {gpu_nodeid_map}")
         mapped_gpus = [gpu_nodeid_map.get(node_id) for node_id in gpu_node_ids]
         print(f"Mapped GPUs: {mapped_gpus}")
-        
+
         # Filter out None values and sort
         valid_mapped_gpus = [gpu for gpu in mapped_gpus if gpu is not None]
         sorted_gpus = sorted(valid_mapped_gpus)
         print(f"Sorted valid GPUs: {sorted_gpus}")
-        
+
         if sorted_gpus != [0, 2, 3, 4, 5, 7]:
-            pytest.fail(f"docker_gpus did not bind expected gpus in docker container. Expected: [0, 2, 3, 4, 5, 7], Got: {sorted_gpus}, Raw node IDs: {gpu_node_ids}, Mapping: {gpu_nodeid_map}")
+            pytest.fail(
+                f"docker_gpus did not bind expected gpus in docker container. Expected: [0, 2, 3, 4, 5, 7], Got: {sorted_gpus}, Raw node IDs: {gpu_node_ids}, Mapping: {gpu_nodeid_map}"
+            )
 
-    @pytest.mark.skipif(
-        get_num_cpus() < 64, reason="test requires atleast 64 cpus"
-    )
+    @pytest.mark.skipif(get_num_cpus() < 64, reason="test requires atleast 64 cpus")
     @pytest.mark.parametrize(
         "clean_test_temp_files",
         [["perf.csv", "perf.html", "results_dummy_cpubind.csv"]],
@@ -425,25 +425,25 @@ def test_gpu_product_name_matches_arch(self):
         """
 
         context = Context()
-        product_name = context.ctx['docker_env_vars']["MAD_SYSTEM_GPU_PRODUCT_NAME"]
+        product_name = context.ctx["docker_env_vars"]["MAD_SYSTEM_GPU_PRODUCT_NAME"]
 
-        #fail the test if GPU product name is empty
+        # fail the test if GPU product name is empty
         if not product_name or not product_name.strip():
             pytest.fail("GPU product name is empty or just whitespaces")
 
         product_name = product_name.upper()
 
-        #if product name has AMD or NVIDIA in it then it's a safe bet
-        #that it was parsed properly
+        # if product name has AMD or NVIDIA in it then it's a safe bet
+        # that it was parsed properly
         if not ("AMD" in product_name or "NVIDIA" in product_name):
             pytest.fail(f"Incorrect product name={product_name!r}")
 
 
-
 # ============================================================================
 # Tag Filtering Tests
 # ============================================================================
 
+
 class TestTagsFunctionality:
 
     @pytest.mark.parametrize(
@@ -463,7 +463,7 @@ def test_can_select_model_subset_with_commandline_tag_argument(
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            +             f"python3 -m madengine.cli.app run --tags dummy_group_1 --live-output --additional-context '{json.dumps(context)}'"
+            + f"python3 -m madengine.cli.app run --tags dummy_group_1 --live-output --additional-context '{json.dumps(context)}'"
         )
 
         # Check for model execution (handles ANSI codes in output)
@@ -520,11 +520,9 @@ def test_model_names_are_automatically_tags(
             + "MODEL_DIR="
             + MODEL_DIR
             + " "
-            +             f"python3 -m madengine.cli.app run --tags dummy --live-output --additional-context '{json.dumps(context)}'"
+            + f"python3 -m madengine.cli.app run --tags dummy --live-output --additional-context '{json.dumps(context)}'"
         )
 
         # Check for model execution (handles ANSI codes in output)
         if "dummy" not in output or "ci-dummy_dummy" not in output:
             pytest.fail("dummy tag not selected with commandline --tags argument")
-
-
diff --git a/tests/e2e/test_scripting_workflows.py b/tests/e2e/test_scripting_workflows.py
index 3c163f1a..f1c45e3d 100644
--- a/tests/e2e/test_scripting_workflows.py
+++ b/tests/e2e/test_scripting_workflows.py
@@ -11,11 +11,14 @@
 import pytest
 
 # project modules
-from tests.fixtures.utils import BASE_DIR, MODEL_DIR
-from tests.fixtures.utils import global_data
-from tests.fixtures.utils import clean_test_temp_files
-from tests.fixtures.utils import DEFAULT_CLEAN_FILES
-from tests.fixtures.utils import is_nvidia
+from tests.fixtures.utils import (
+    BASE_DIR,
+    DEFAULT_CLEAN_FILES,
+    MODEL_DIR,
+    clean_test_temp_files,
+    global_data,
+    is_nvidia,
+)
 
 
 class TestPrePostScriptsFunctionality:
diff --git a/tests/fixtures/configs/test_slurm_job.yaml b/tests/fixtures/configs/test_slurm_job.yaml
new file mode 100644
index 00000000..97342a7f
--- /dev/null
+++ b/tests/fixtures/configs/test_slurm_job.yaml
@@ -0,0 +1,17 @@
+model:
+  tags: [dummy]
+
+slurm:
+  partition: test-partition
+  nodes: 2
+
+distributed:
+  enabled: true
+  launcher: torchrun
+  nnodes: 2
+  nproc_per_node: 4
+
+env_vars:
+  MY_VAR: test_value
+
+debug: true
diff --git a/tests/fixtures/dummy/data.json b/tests/fixtures/dummy/data.json
index 2c76f3df..5ca41a83 100644
--- a/tests/fixtures/dummy/data.json
+++ b/tests/fixtures/dummy/data.json
@@ -9,4 +9,4 @@
             "path": "/tmp/nonexistent"
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/fixtures/dummy/docker/dummy_sglang.ubuntu.amd.Dockerfile b/tests/fixtures/dummy/docker/dummy_sglang.ubuntu.amd.Dockerfile
index f45e5bc3..33086f8e 100644
--- a/tests/fixtures/dummy/docker/dummy_sglang.ubuntu.amd.Dockerfile
+++ b/tests/fixtures/dummy/docker/dummy_sglang.ubuntu.amd.Dockerfile
@@ -130,4 +130,3 @@ RUN echo "======================================================================
     echo "" && \
     echo "🚀 Ready for distributed LLM inference on AMD GPUs!" && \
     echo ""
-
diff --git a/tests/fixtures/dummy/docker/dummy_sglang_disagg.ubuntu.amd.Dockerfile b/tests/fixtures/dummy/docker/dummy_sglang_disagg.ubuntu.amd.Dockerfile
index 43d04337..67e4f5cb 100644
--- a/tests/fixtures/dummy/docker/dummy_sglang_disagg.ubuntu.amd.Dockerfile
+++ b/tests/fixtures/dummy/docker/dummy_sglang_disagg.ubuntu.amd.Dockerfile
@@ -183,4 +183,3 @@ RUN echo "======================================================================
     echo "   Note: This is a dummy/test image for madengine validation" && \
     echo "   For production: Use full Mooncake with RDMA support" && \
     echo ""
-
diff --git a/tests/fixtures/dummy/docker/dummy_therock.ubuntu.amd.Dockerfile b/tests/fixtures/dummy/docker/dummy_therock.ubuntu.amd.Dockerfile
index 16dda670..47fb3644 100644
--- a/tests/fixtures/dummy/docker/dummy_therock.ubuntu.amd.Dockerfile
+++ b/tests/fixtures/dummy/docker/dummy_therock.ubuntu.amd.Dockerfile
@@ -123,4 +123,3 @@ LABEL description="TheRock PyTorch Benchmark - The HIP Environment and ROCm Kit
 LABEL version="nightly"
 LABEL gpu_architecture="${MAD_SYSTEM_GPU_ARCHITECTURE}"
 LABEL components="core_runtime,hip_runtime,blas,prim,rand,pytorch"
-
diff --git a/tests/fixtures/dummy/docker/dummy_torchrun.ubuntu.amd.Dockerfile b/tests/fixtures/dummy/docker/dummy_torchrun.ubuntu.amd.Dockerfile
index e195b386..8aaccc86 100644
--- a/tests/fixtures/dummy/docker/dummy_torchrun.ubuntu.amd.Dockerfile
+++ b/tests/fixtures/dummy/docker/dummy_torchrun.ubuntu.amd.Dockerfile
@@ -43,4 +43,3 @@ RUN rocminfo > /dev/null 2>&1 || echo "ROCm info check failed (expected in non-G
 # Note: The K8s deployment config should override these env vars if needed:
 # - MIOPEN_FIND_MODE is already set in deployment_config.env_vars
 # - MIOPEN_USER_DB_PATH is already set in deployment_config.env_vars
-
diff --git a/tests/fixtures/dummy/docker/dummy_torchtitan.ubuntu.amd.Dockerfile b/tests/fixtures/dummy/docker/dummy_torchtitan.ubuntu.amd.Dockerfile
index 48deaa06..1d333905 100644
--- a/tests/fixtures/dummy/docker/dummy_torchtitan.ubuntu.amd.Dockerfile
+++ b/tests/fixtures/dummy/docker/dummy_torchtitan.ubuntu.amd.Dockerfile
@@ -69,4 +69,3 @@ RUN python3 -c "import torch; print(f'✓ PyTorch version: {torch.__version__}')
     rocminfo > /dev/null 2>&1 || echo "ROCm check (OK in build env)"
 
 WORKDIR /workspace
-
diff --git a/tests/fixtures/dummy/docker/dummy_vllm.ubuntu.amd.Dockerfile b/tests/fixtures/dummy/docker/dummy_vllm.ubuntu.amd.Dockerfile
index cf180092..480dfc12 100644
--- a/tests/fixtures/dummy/docker/dummy_vllm.ubuntu.amd.Dockerfile
+++ b/tests/fixtures/dummy/docker/dummy_vllm.ubuntu.amd.Dockerfile
@@ -93,4 +93,3 @@ RUN echo "=======================================" && \
     echo "PyTorch Version: $(python3 -c 'import torch; print(torch.__version__)')" && \
     echo "Build Type: Production (Real vLLM with ROCm)" && \
     echo "======================================="
-
diff --git a/tests/fixtures/dummy/models.json b/tests/fixtures/dummy/models.json
index 140779ab..48942f26 100644
--- a/tests/fixtures/dummy/models.json
+++ b/tests/fixtures/dummy/models.json
@@ -255,7 +255,7 @@
     "name": "dummy_torchrun",
     "dockerfile": "docker/dummy_torchrun",
     "scripts": "scripts/dummy_torchrun/run.sh",
-    "n_gpus": "1",
+    "n_gpus": "4",
     "owner": "mad.support@amd.com",
     "training_precision": "",
     "tags": [
@@ -268,7 +268,7 @@
     "name": "dummy_torchrun_multi",
     "dockerfile": "docker/dummy_torchrun",
     "scripts": "scripts/dummy_torchrun/run_multi.sh",
-    "n_gpus": "1",
+    "n_gpus": "4",
     "owner": "mad.support@amd.com",
     "training_precision": "",
     "tags": [
diff --git a/tests/fixtures/dummy/scripts/dummy/run.sh b/tests/fixtures/dummy/scripts/dummy/run.sh
index e5db9e7b..2c9893f7 100644
--- a/tests/fixtures/dummy/scripts/dummy/run.sh
+++ b/tests/fixtures/dummy/scripts/dummy/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 echo "performance: $RANDOM samples_per_second"
diff --git a/tests/fixtures/dummy/scripts/dummy/run_cpu_bind.sh b/tests/fixtures/dummy/scripts/dummy/run_cpu_bind.sh
index 1206d310..f4e57df9 100644
--- a/tests/fixtures/dummy/scripts/dummy/run_cpu_bind.sh
+++ b/tests/fixtures/dummy/scripts/dummy/run_cpu_bind.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 cpus=""
 if [ -f "/sys/fs/cgroup/cpuset/cpuset.cpus" ]; then
diff --git a/tests/fixtures/dummy/scripts/dummy/run_ctxtest.sh b/tests/fixtures/dummy/scripts/dummy/run_ctxtest.sh
index c69d8d1b..d6a43a86 100644
--- a/tests/fixtures/dummy/scripts/dummy/run_ctxtest.sh
+++ b/tests/fixtures/dummy/scripts/dummy/run_ctxtest.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 echo "performance: ${ctxtest} context"
diff --git a/tests/fixtures/dummy/scripts/dummy/run_data_aws.sh b/tests/fixtures/dummy/scripts/dummy/run_data_aws.sh
index ab0a8641..6a66f296 100644
--- a/tests/fixtures/dummy/scripts/dummy/run_data_aws.sh
+++ b/tests/fixtures/dummy/scripts/dummy/run_data_aws.sh
@@ -6,5 +6,3 @@ else
     echo "${MAD_DATAHOME}/bert-large-uncased_L_24_H_1024_A_16_V_30528_S_512_Dp_0.1_optimized_layer_norm_opset12.onnx is NOT present"
     exit 1
 fi
-
-
diff --git a/tests/fixtures/dummy/scripts/dummy/run_data_local.sh b/tests/fixtures/dummy/scripts/dummy/run_data_local.sh
index 4c5efd5e..1d0f1286 100644
--- a/tests/fixtures/dummy/scripts/dummy/run_data_local.sh
+++ b/tests/fixtures/dummy/scripts/dummy/run_data_local.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 mountCode=`mount | grep "${MAD_DATAHOME} "`
 if [ -z "$mountCode" ]; then
@@ -12,5 +12,3 @@ else
     echo "${MAD_DATAHOME} is mounted"
     echo "performance: $RANDOM samples_per_second"
 fi
-
-
diff --git a/tests/fixtures/dummy/scripts/dummy/run_data_minio.sh b/tests/fixtures/dummy/scripts/dummy/run_data_minio.sh
index ce697b39..d0e4caa0 100644
--- a/tests/fixtures/dummy/scripts/dummy/run_data_minio.sh
+++ b/tests/fixtures/dummy/scripts/dummy/run_data_minio.sh
@@ -4,4 +4,4 @@ if [ -f "${MAD_DATAHOME}/bert-large-uncased_L_24_H_1024_A_16_V_30528_S_512_Dp_0.
 else
     echo "${MAD_DATAHOME}/bert-large-uncased_L_24_H_1024_A_16_V_30528_S_512_Dp_0.1_optimized_layer_norm_opset12.onnx is NOT present"
     exit 1
-fi
\ No newline at end of file
+fi
diff --git a/tests/fixtures/dummy/scripts/dummy/run_data_nas.sh b/tests/fixtures/dummy/scripts/dummy/run_data_nas.sh
index 878d9330..e464ffd3 100644
--- a/tests/fixtures/dummy/scripts/dummy/run_data_nas.sh
+++ b/tests/fixtures/dummy/scripts/dummy/run_data_nas.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 if [ -z ${MAD_DATAHOME+x} ]; then
     echo "MAD_DATAHOME is NOT set"
@@ -34,5 +34,3 @@ else
     echo "${MAD_DATAHOME} is mounted"
     echo "performance: $RANDOM samples_per_second"
 fi
-
-
diff --git a/tests/fixtures/dummy/scripts/dummy/run_gpu_bind.sh b/tests/fixtures/dummy/scripts/dummy/run_gpu_bind.sh
index db82af02..d27f651a 100644
--- a/tests/fixtures/dummy/scripts/dummy/run_gpu_bind.sh
+++ b/tests/fixtures/dummy/scripts/dummy/run_gpu_bind.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 node_ids=()
 
diff --git a/tests/fixtures/dummy/scripts/dummy/run_multi.sh b/tests/fixtures/dummy/scripts/dummy/run_multi.sh
index 4bc59b36..ed10f5cc 100644
--- a/tests/fixtures/dummy/scripts/dummy/run_multi.sh
+++ b/tests/fixtures/dummy/scripts/dummy/run_multi.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 echo "model,temperature,performance,metric
 1,$RANDOM,$RANDOM,samples_per_sec
diff --git a/tests/fixtures/dummy/scripts/dummy/run_nccl_trace.sh b/tests/fixtures/dummy/scripts/dummy/run_nccl_trace.sh
index da2a8798..5eaa5f5e 100644
--- a/tests/fixtures/dummy/scripts/dummy/run_nccl_trace.sh
+++ b/tests/fixtures/dummy/scripts/dummy/run_nccl_trace.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 python -c "import torch; import torch.distributed as dist; import os; os.environ['MASTER_ADDR'] = 'localhost'; os.environ['MASTER_PORT'] = '29501'; dist.init_process_group('nccl', rank=0, world_size=1);tensor = torch.arange(1, dtype=torch.int64).cuda(); dist.all_reduce(tensor, op=dist.ReduceOp.SUM); print(tensor[0]); "  | tee log.txt
- 
-echo "performance: 1 pass" 
+
+echo "performance: 1 pass"
diff --git a/tests/fixtures/dummy/scripts/dummy/run_prof.sh b/tests/fixtures/dummy/scripts/dummy/run_prof.sh
index 85a5a05f..b614e679 100644
--- a/tests/fixtures/dummy/scripts/dummy/run_prof.sh
+++ b/tests/fixtures/dummy/scripts/dummy/run_prof.sh
@@ -1,10 +1,10 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 python -c "import torch; x = torch.ones(10,10).to('cuda'); l = torch.nn.Linear(10,30).cuda(); c = torch.nn.Conv2d(1, 20, 3).cuda(); out1 = l(x); out1 = out1[None, None, :, :] ; out2 = c(out1); print( 'performance=' + str(torch.cuda.memory_allocated(0)) )"  | tee log.txt
- 
+
 performance=$(grep -o "performance=[0-9]*" log.txt | tail -n 1 | sed 's/performance=//')
-echo "performance: $performance bytes" 
+echo "performance: $performance bytes"
diff --git a/tests/fixtures/dummy/scripts/dummy/run_sleep.sh b/tests/fixtures/dummy/scripts/dummy/run_sleep.sh
index 22b1c179..ec4f6c6f 100644
--- a/tests/fixtures/dummy/scripts/dummy/run_sleep.sh
+++ b/tests/fixtures/dummy/scripts/dummy/run_sleep.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 sleep $1
 echo "performance: $RANDOM samples_per_second"
diff --git a/tests/fixtures/dummy/scripts/dummy2/models.json b/tests/fixtures/dummy/scripts/dummy2/models.json
index de114986..75ae208b 100644
--- a/tests/fixtures/dummy/scripts/dummy2/models.json
+++ b/tests/fixtures/dummy/scripts/dummy2/models.json
@@ -25,4 +25,4 @@
         ],
         "args": ""
       }
-]
\ No newline at end of file
+]
diff --git a/tests/fixtures/dummy/scripts/dummy2/run.sh b/tests/fixtures/dummy/scripts/dummy2/run.sh
index e5db9e7b..2c9893f7 100644
--- a/tests/fixtures/dummy/scripts/dummy2/run.sh
+++ b/tests/fixtures/dummy/scripts/dummy2/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 echo "performance: $RANDOM samples_per_second"
diff --git a/tests/fixtures/dummy/scripts/dummy3/get_models_json.py b/tests/fixtures/dummy/scripts/dummy3/get_models_json.py
index 425a0b19..6c0c857d 100644
--- a/tests/fixtures/dummy/scripts/dummy3/get_models_json.py
+++ b/tests/fixtures/dummy/scripts/dummy3/get_models_json.py
@@ -4,6 +4,7 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
+
 from madengine.utils.discover_models import CustomModel
 
 Model3Data = CustomModel(
@@ -18,20 +19,20 @@
     multiple_results="",
 )
 
+
 class Dummy3CustomModel(CustomModel):
     def update_model(self):
-        self.dockerfile="docker/dummy"
-        self.scripts="run.sh"
+        self.dockerfile = "docker/dummy"
+        self.scripts = "run.sh"
         self.n_gpus = "-1"
         self.owner = "mad.support@amd.com"
         self.training_precision = ""
         self.args = ""
         self.multiple_results = ""
 
-Model4Data = Dummy3CustomModel(
-    name="model4",
-    tags = ["dummies", "dummy_test_group_3"]
-)
+
+Model4Data = Dummy3CustomModel(name="model4", tags=["dummies", "dummy_test_group_3"])
+
 
 def list_models():
-    return [Model3Data, Model4Data]
\ No newline at end of file
+    return [Model3Data, Model4Data]
diff --git a/tests/fixtures/dummy/scripts/dummy3/run.sh b/tests/fixtures/dummy/scripts/dummy3/run.sh
index e5db9e7b..2c9893f7 100644
--- a/tests/fixtures/dummy/scripts/dummy3/run.sh
+++ b/tests/fixtures/dummy/scripts/dummy3/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
-# 
+#
 # Copyright (c) Advanced Micro Devices, Inc.
 # All rights reserved.
-# 
+#
 
 echo "performance: $RANDOM samples_per_second"
diff --git a/tests/fixtures/dummy/scripts/dummy_deepspeed/run.sh b/tests/fixtures/dummy/scripts/dummy_deepspeed/run.sh
index aa86bc85..8d3ae199 100644
--- a/tests/fixtures/dummy/scripts/dummy_deepspeed/run.sh
+++ b/tests/fixtures/dummy/scripts/dummy_deepspeed/run.sh
@@ -29,4 +29,3 @@ $LAUNCHER_CMD run_deepspeed.py --deepspeed_config ds_config.json
 echo "========================================================================"
 echo "Training script completed"
 echo "========================================================================"
-
diff --git a/tests/fixtures/dummy/scripts/dummy_deepspeed/run_deepspeed.py b/tests/fixtures/dummy/scripts/dummy_deepspeed/run_deepspeed.py
index 7851597f..1d0bef66 100755
--- a/tests/fixtures/dummy/scripts/dummy_deepspeed/run_deepspeed.py
+++ b/tests/fixtures/dummy/scripts/dummy_deepspeed/run_deepspeed.py
@@ -12,15 +12,16 @@
   deepspeed --num_gpus=2 run_deepspeed.py
 """
 
+import argparse
 import os
+import socket
 import sys
 import time
-import socket
-import argparse
+
+import deepspeed
 import torch
-import torch.nn as nn
 import torch.distributed as dist
-import deepspeed
+import torch.nn as nn
 
 # Configuration
 NUM_EPOCHS = 3
@@ -28,8 +29,10 @@
 IMAGE_SIZE = 224
 NUM_CLASSES = 1000
 
+
 class SimpleModel(nn.Module):
     """Simple model for DeepSpeed testing"""
+
     def __init__(self, num_classes=1000):
         super().__init__()
         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
@@ -41,7 +44,7 @@ def __init__(self, num_classes=1000):
         self.bn3 = nn.BatchNorm2d(256)
         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
         self.fc = nn.Linear(256, num_classes)
-    
+
     def forward(self, x):
         x = self.pool(torch.relu(self.bn1(self.conv1(x))))
         x = self.pool(torch.relu(self.bn2(self.conv2(x))))
@@ -50,11 +53,12 @@ def forward(self, x):
         x = torch.flatten(x, 1)
         return self.fc(x)
 
+
 def print_header(args):
     rank = int(os.environ.get("RANK", 0))
     local_rank = int(os.environ.get("LOCAL_RANK", 0))
     world_size = int(os.environ.get("WORLD_SIZE", 1))
-    
+
     if rank == 0:
         print("=" * 70)
         print("DeepSpeed Distributed Training Benchmark")
@@ -65,71 +69,75 @@ def print_header(args):
         print(f"Training: {NUM_EPOCHS} epochs, {NUM_BATCHES} batches/epoch")
         print("=" * 70)
 
+
 def train_epoch(model_engine, criterion, epoch):
     model_engine.train()
     start_time = time.time()
     total_loss = 0
-    
+
     local_rank = model_engine.local_rank
     micro_batch_size = model_engine.train_micro_batch_size_per_gpu()
-    
+
     for batch_idx in range(NUM_BATCHES):
         # Synthetic data
         inputs = torch.randn(
-            micro_batch_size, 3, IMAGE_SIZE, IMAGE_SIZE,
-            device=model_engine.device
+            micro_batch_size, 3, IMAGE_SIZE, IMAGE_SIZE, device=model_engine.device
         )
         labels = torch.randint(
-            0, NUM_CLASSES, (micro_batch_size,),
-            device=model_engine.device
+            0, NUM_CLASSES, (micro_batch_size,), device=model_engine.device
         )
-        
+
         # Forward pass
         outputs = model_engine(inputs)
         loss = criterion(outputs, labels)
-        
+
         # Backward pass (DeepSpeed handles gradients, optimization)
         model_engine.backward(loss)
         model_engine.step()
-        
+
         total_loss += loss.item()
-        
+
         if local_rank == 0 and (batch_idx + 1) % 10 == 0:
-            print(f"Epoch [{epoch+1}] Batch [{batch_idx+1}/{NUM_BATCHES}] Loss: {loss.item():.4f}")
-    
+            print(
+                f"Epoch [{epoch+1}] Batch [{batch_idx+1}/{NUM_BATCHES}] Loss: {loss.item():.4f}"
+            )
+
     epoch_time = time.time() - start_time
     avg_loss = total_loss / NUM_BATCHES
-    
+
     # Calculate node-local throughput
     # Get local world size (GPUs per node)
     local_world_size = int(os.environ.get("LOCAL_WORLD_SIZE", 1))
-    
+
     # Node throughput = samples processed by all GPUs on this node
     node_throughput = (NUM_BATCHES * micro_batch_size * local_world_size) / epoch_time
-    
+
     return avg_loss, node_throughput
 
+
 def main():
     # Start timer for total test duration
     test_start_time = time.time()
-    
+
     # Parse DeepSpeed args
     parser = argparse.ArgumentParser()
     # local_rank default should come from environment (set by torchrun)
-    parser.add_argument('--local_rank', type=int, default=int(os.environ.get('LOCAL_RANK', 0)))
-    parser.add_argument('--deepspeed_config', type=str, default='ds_config.json')
+    parser.add_argument(
+        "--local_rank", type=int, default=int(os.environ.get("LOCAL_RANK", 0))
+    )
+    parser.add_argument("--deepspeed_config", type=str, default="ds_config.json")
     args = parser.parse_args()
-    
+
     # Handle config file path - supports multiple locations for K8s/local execution
     config_found = False
     original_config_path = args.deepspeed_config
     script_dir = os.path.dirname(os.path.abspath(__file__))
-    
+
     # Try 1: Check as-is (current directory or absolute path)
     if os.path.exists(args.deepspeed_config):
         config_found = True
         print(f"[Config] Found DeepSpeed config: {args.deepspeed_config}")
-    
+
     # Try 2: Check relative to script directory (for K8s execution)
     if not config_found:
         config_path = os.path.join(script_dir, args.deepspeed_config)
@@ -137,80 +145,86 @@ def main():
             args.deepspeed_config = config_path
             config_found = True
             print(f"[Config] Found DeepSpeed config in script directory: {config_path}")
-    
+
     # Try 3: Check in scripts/dummy_deepspeed/ directory (for local execution)
     if not config_found:
-        local_config_path = os.path.join('scripts/dummy_deepspeed', args.deepspeed_config)
+        local_config_path = os.path.join(
+            "scripts/dummy_deepspeed", args.deepspeed_config
+        )
         if os.path.exists(local_config_path):
             args.deepspeed_config = local_config_path
             config_found = True
-            print(f"[Config] Found DeepSpeed config in scripts directory: {local_config_path}")
-    
+            print(
+                f"[Config] Found DeepSpeed config in scripts directory: {local_config_path}"
+            )
+
     # Error if not found
     if not config_found:
         print(f"\n❌ Error: DeepSpeed config not found!")
         print(f"Searched for: {original_config_path}")
         print(f"Locations tried:")
         print(f"  1. Current directory: {os.getcwd()}/{original_config_path}")
-        print(f"  2. Script directory: {os.path.join(script_dir, original_config_path)}")
+        print(
+            f"  2. Script directory: {os.path.join(script_dir, original_config_path)}"
+        )
         print(f"  3. Scripts directory: scripts/dummy_deepspeed/{original_config_path}")
         print(f"\nCurrent directory: {os.getcwd()}")
         print(f"Files in current directory:")
         try:
-            for f in os.listdir('.'):
+            for f in os.listdir("."):
                 print(f"  - {f}")
         except Exception as e:
             print(f"  (Cannot list: {e})")
         print(f"\nScript location: {os.path.abspath(__file__)}")
         sys.exit(1)
-    
+
     print_header(args)
-    
+
     # Initialize PyTorch distributed backend BEFORE DeepSpeed
     # This prevents DeepSpeed from trying to use MPI
     if not dist.is_initialized():
         dist.init_process_group(backend="nccl")
         print(f"✓ PyTorch distributed initialized (backend: nccl)")
-    
+
     # Create model
     model = SimpleModel(NUM_CLASSES)
-    
+
     # Initialize DeepSpeed
     # Note: When using deepspeed launcher with --deepspeed_config arg,
     # do NOT pass config parameter to initialize() - it causes a conflict
     model_engine, optimizer, _, _ = deepspeed.initialize(
-        args=args,
-        model=model,
-        model_parameters=model.parameters()
+        args=args, model=model, model_parameters=model.parameters()
     )
-    
+
     criterion = nn.CrossEntropyLoss()
-    
+
     rank = model_engine.local_rank
-    
+
     if rank == 0:
         print(f"\n✓ DeepSpeed initialized")
         print(f"  ZeRO Stage: {model_engine.zero_optimization_stage()}")
         print(f"  Micro Batch Size: {model_engine.train_micro_batch_size_per_gpu()}")
         print(f"  Gradient Accumulation: {model_engine.gradient_accumulation_steps()}")
         print(f"\nStarting training...\n")
-    
+
     # Get topology information
     rank = int(os.environ.get("RANK", 0))
     local_rank = model_engine.local_rank
     local_world_size = int(os.environ.get("LOCAL_WORLD_SIZE", 1))
     world_size = model_engine.world_size
     node_rank = rank // local_world_size if local_world_size > 0 else 0
-    
+
     # Training loop
     all_throughputs = []
     for epoch in range(NUM_EPOCHS):
         avg_loss, node_throughput = train_epoch(model_engine, criterion, epoch)
         all_throughputs.append(node_throughput)
-        
+
         if local_rank == 0:
-            print(f"\n[Node {node_rank}] Epoch {epoch+1} Complete: Loss={avg_loss:.4f}, Node Throughput={node_throughput:.2f} samples/sec\n")
-    
+            print(
+                f"\n[Node {node_rank}] Epoch {epoch+1} Complete: Loss={avg_loss:.4f}, Node Throughput={node_throughput:.2f} samples/sec\n"
+            )
+
     # ========================================================================
     # Node-Local Performance Reporting (NEW - Best Practice)
     # Each node reports its OWN performance
@@ -226,24 +240,26 @@ def main():
         print(f"Node Throughput: {avg_node_throughput:.2f} samples_per_second")
         print(f"ZeRO Stage: {model_engine.zero_optimization_stage()}")
         print(f"{'='*70}")
-        
+
         # CRITICAL: Standard output format for madengine parsing
         print(f"\nperformance: {avg_node_throughput:.2f} samples_per_second")
         print(f"node_id: {node_rank}")
         print(f"local_gpus: {local_world_size}")
         print(f"deepspeed_config: ZeRO_stage={model_engine.zero_optimization_stage()}")
-        
+
         # Calculate and print test duration
         test_duration = time.time() - test_start_time
         print(f"test_duration: {test_duration:.2f}s")
-    
+
     return 0
 
+
 if __name__ == "__main__":
     try:
         sys.exit(main())
     except Exception as e:
         print(f"Error: {e}", file=sys.stderr)
         import traceback
+
         traceback.print_exc()
         sys.exit(1)
diff --git a/tests/fixtures/dummy/scripts/dummy_megatron_lm/run_megatron.py b/tests/fixtures/dummy/scripts/dummy_megatron_lm/run_megatron.py
index 70265702..f293c386 100755
--- a/tests/fixtures/dummy/scripts/dummy_megatron_lm/run_megatron.py
+++ b/tests/fixtures/dummy/scripts/dummy_megatron_lm/run_megatron.py
@@ -16,9 +16,10 @@
 """
 
 import os
+import socket
 import sys
 import time
-import socket
+
 import torch
 import torch.nn as nn
 
@@ -26,12 +27,13 @@
 try:
     from megatron.core import mpu, tensor_parallel
     from megatron.core.parallel_state import (
-        initialize_model_parallel,
         destroy_model_parallel,
-        get_tensor_model_parallel_world_size,
-        get_pipeline_model_parallel_world_size,
         get_data_parallel_world_size,
+        get_pipeline_model_parallel_world_size,
+        get_tensor_model_parallel_world_size,
+        initialize_model_parallel,
     )
+
     MEGATRON_AVAILABLE = True
 except ImportError:
     MEGATRON_AVAILABLE = False
@@ -57,6 +59,7 @@
 pipeline_model_parallel_size = int(os.environ.get("PIPELINE_MODEL_PARALLEL_SIZE", 1))
 context_parallel_size = int(os.environ.get("CONTEXT_PARALLEL_SIZE", 1))
 
+
 def print_header(tp_size, pp_size, dp_size):
     """Print training configuration header"""
     print("=" * 70)
@@ -77,110 +80,118 @@ def print_header(tp_size, pp_size, dp_size):
     print(f"  Hidden Size: {HIDDEN_SIZE}")
     print("=" * 70)
 
+
 class SimpleMegatronModel(nn.Module):
     """
     Simplified model using Megatron-style patterns.
     In production, use megatron.core.models for actual transformer implementations.
     """
+
     def __init__(self, hidden_size, num_classes):
         super().__init__()
         self.embedding = nn.Linear(SEQ_LENGTH, hidden_size)
-        
+
         # Simple transformer layers
         self.transformer = nn.TransformerEncoder(
             nn.TransformerEncoderLayer(
                 d_model=hidden_size,
                 nhead=8,
                 dim_feedforward=hidden_size * 4,
-                batch_first=True
+                batch_first=True,
             ),
-            num_layers=6
+            num_layers=6,
         )
         self.classifier = nn.Linear(hidden_size, num_classes)
-    
+
     def forward(self, x):
         x = self.embedding(x)
         x = self.transformer(x)
         x = x.mean(dim=1)  # Global pooling
         return self.classifier(x)
 
+
 def train_epoch(model, optimizer, criterion, epoch, device, local_dp_size):
     """Training loop for one epoch with node-local throughput"""
     model.train()
     start_time = time.time()
     total_loss = 0
-    
+
     for batch_idx in range(NUM_BATCHES):
         # Generate synthetic data
         inputs = torch.randn(BATCH_SIZE, 1, SEQ_LENGTH, device=device)
         labels = torch.randint(0, NUM_CLASSES, (BATCH_SIZE,), device=device)
-        
+
         # Forward pass
         optimizer.zero_grad()
         outputs = model(inputs)
         loss = criterion(outputs, labels)
-        
+
         # Backward pass
         loss.backward()
-        
+
         # Optimizer step
         optimizer.step()
-        
+
         total_loss += loss.item()
-        
+
         # Log progress from local_rank 0
         if local_rank == 0 and (batch_idx + 1) % 10 == 0:
-            print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
-                  f"Batch [{batch_idx+1}/{NUM_BATCHES}] "
-                  f"Loss: {loss.item():.4f}")
-    
+            print(
+                f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
+                f"Batch [{batch_idx+1}/{NUM_BATCHES}] "
+                f"Loss: {loss.item():.4f}"
+            )
+
     epoch_time = time.time() - start_time
     avg_loss = total_loss / NUM_BATCHES
-    
+
     # Calculate node-local throughput
     # local_dp_size = data parallel size on this node
     node_throughput = (NUM_BATCHES * BATCH_SIZE * local_dp_size) / epoch_time
-    
+
     return avg_loss, node_throughput
 
+
 def main():
     """Main training function using Megatron-Core"""
     # Start timer for total test duration
     test_start_time = time.time()
-    
+
     # Set device
     device = torch.device(f"cuda:{local_rank}" if torch.cuda.is_available() else "cpu")
     if torch.cuda.is_available():
         torch.cuda.set_device(device)
-    
+
     # Initialize distributed and model parallelism
     if MEGATRON_AVAILABLE and world_size > 1:
         # Initialize with Megatron-Core
         if rank == 0:
             print(f"[Rank {rank}] Initializing Megatron-Core model parallelism...")
-        
+
         torch.distributed.init_process_group(backend="nccl", init_method="env://")
-        
+
         # Initialize Megatron model parallel groups
         initialize_model_parallel(
             tensor_model_parallel_size=tensor_model_parallel_size,
             pipeline_model_parallel_size=pipeline_model_parallel_size,
             context_parallel_size=context_parallel_size,
         )
-        
+
         # Get actual parallel sizes from Megatron-Core
         tp_size = get_tensor_model_parallel_world_size()
         pp_size = get_pipeline_model_parallel_world_size()
         dp_size = get_data_parallel_world_size()
-        
+
         if rank == 0:
             print(f"[Rank {rank}] ✓ Megatron-Core initialized")
             print(f"[Rank {rank}]   TP={tp_size}, PP={pp_size}, DP={dp_size}")
-    
+
     elif world_size > 1:
         # Fallback to basic DDP
         if rank == 0:
-            print(f"[Rank {rank}] Using basic PyTorch DDP (Megatron-Core not available)")
+            print(
+                f"[Rank {rank}] Using basic PyTorch DDP (Megatron-Core not available)"
+            )
         torch.distributed.init_process_group(backend="nccl", init_method="env://")
         tp_size = 1
         pp_size = 1
@@ -190,45 +201,50 @@ def main():
         tp_size = 1
         pp_size = 1
         dp_size = 1
-    
+
     # Print configuration
     print_header(tp_size, pp_size, dp_size)
-    
+
     if torch.cuda.is_available():
         print(f"[Rank {rank}] Using GPU: {torch.cuda.get_device_name(device)}")
-    
+
     # Create model
     model = SimpleMegatronModel(HIDDEN_SIZE, NUM_CLASSES).to(device)
-    
+
     # Wrap with DDP if needed (in production, use Megatron's model wrappers)
     if world_size > 1 and not MEGATRON_AVAILABLE:
         from torch.nn.parallel import DistributedDataParallel as DDP
+
         model = DDP(model, device_ids=[local_rank], output_device=local_rank)
-    
+
     # Optimizer and loss
     optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.01)
     criterion = nn.CrossEntropyLoss()
-    
+
     # Get local world size and node rank
     local_world_size = int(os.environ.get("LOCAL_WORLD_SIZE", 1))
     node_rank = rank // local_world_size if local_world_size > 0 else 0
-    
+
     # Calculate local data parallel size (DP ranks on this node)
     # In Megatron: DP = world_size / (TP * PP * CP)
     # For simplicity, assume local_dp_size proportional to local_world_size
-    local_dp_size = dp_size // (world_size // local_world_size) if (world_size // local_world_size) > 0 else dp_size
+    local_dp_size = (
+        dp_size // (world_size // local_world_size)
+        if (world_size // local_world_size) > 0
+        else dp_size
+    )
     if local_dp_size < 1:
         local_dp_size = 1
-    
+
     # Synchronize before training
     if world_size > 1:
         torch.distributed.barrier()
-    
+
     if local_rank == 0:
         print(f"\n{'='*70}")
         print(f"[Node {node_rank}] Starting Training")
         print(f"{'='*70}\n")
-    
+
     # Training loop
     all_throughputs = []
     for epoch in range(NUM_EPOCHS):
@@ -236,12 +252,12 @@ def main():
             model, optimizer, criterion, epoch, device, local_dp_size
         )
         all_throughputs.append(node_throughput)
-        
+
         if local_rank == 0:
             print(f"\n[Node {node_rank}] Epoch {epoch+1}/{NUM_EPOCHS} Complete:")
             print(f"  Loss: {avg_loss:.4f}")
             print(f"  Node Throughput: {node_throughput:.2f} samples/sec\n")
-    
+
     # ========================================================================
     # Node-Local Performance Reporting (NEW - Best Practice)
     # ========================================================================
@@ -260,33 +276,37 @@ def main():
         print(f"  Context Parallel (CP): {context_parallel_size}")
         print(f"  Data Parallel (DP): {dp_size}")
         print(f"{'='*70}")
-        
+
         # CRITICAL: Standard output format for madengine parsing
         print(f"\nperformance: {avg_node_throughput:.2f} samples_per_second")
         print(f"node_id: {node_rank}")
         print(f"local_gpus: {local_world_size}")
-        print(f"megatron_config: TP={tp_size} PP={pp_size} CP={context_parallel_size} DP={dp_size}")
-        
+        print(
+            f"megatron_config: TP={tp_size} PP={pp_size} CP={context_parallel_size} DP={dp_size}"
+        )
+
         # Calculate and print test duration
         test_duration = time.time() - test_start_time
         print(f"test_duration: {test_duration:.2f}s")
-    
+
     # Cleanup
     if MEGATRON_AVAILABLE and world_size > 1:
         destroy_model_parallel()
-    
+
     if world_size > 1:
         torch.distributed.destroy_process_group()
         if rank == 0:
             print(f"\n✓ Distributed cleanup complete")
-    
+
     return 0
 
+
 if __name__ == "__main__":
     try:
         sys.exit(main())
     except Exception as e:
         print(f"[Rank {rank}] Error: {e}", file=sys.stderr)
         import traceback
+
         traceback.print_exc()
         sys.exit(1)
diff --git a/tests/fixtures/dummy/scripts/dummy_sglang/run_sglang_inference.py b/tests/fixtures/dummy/scripts/dummy_sglang/run_sglang_inference.py
index be68a74d..b5c6fa74 100644
--- a/tests/fixtures/dummy/scripts/dummy_sglang/run_sglang_inference.py
+++ b/tests/fixtures/dummy/scripts/dummy_sglang/run_sglang_inference.py
@@ -12,11 +12,11 @@
   Multi-node: One serve per node (TP only on that node), nnodes=1 per process
 """
 
+import argparse
 import os
+import socket
 import sys
 import time
-import argparse
-import socket
 from typing import List, Optional
 
 # Configure environment before importing SGLang
@@ -78,26 +78,26 @@ def generate_prompts(num_prompts: int) -> List[str]:
 def run_inference_sglang(args):
     """
     Run SGLang inference using native Runtime API.
-    
+
     SGLang handles distributed setup automatically via Ray.
     No torchrun needed!
     """
     print("\n" + "=" * 70)
     print("Initializing SGLang Runtime")
     print("=" * 70)
-    
+
     try:
         # Initialize SGLang runtime
         # SGLang automatically handles multi-node setup via Ray
         # when appropriate environment variables are set
-        
+
         runtime_config = {
             "model_path": args.model,
             "tp_size": args.tp_size,
             "trust_remote_code": True,
             "mem_fraction_static": 0.90,
         }
-        
+
         # For multi-node, set Ray init address
         if args.nnodes > 1:
             runtime_config["nccl_init_addr"] = f"{args.master_addr}:{args.master_port}"
@@ -106,19 +106,19 @@ def run_inference_sglang(args):
             print(f"Multi-node setup: {args.nnodes} nodes, rank {args.node_rank}")
         else:
             print(f"Single-node setup: {args.tp_size} GPUs")
-        
+
         # Initialize runtime
         runtime = sgl.Runtime(**runtime_config)
         print("✓ SGLang runtime initialized successfully")
-        
+
     except Exception as e:
         print(f"✗ Failed to initialize SGLang runtime: {e}")
         print("\n⚠️  Falling back to mock inference for testing...")
         return run_inference_mock(args)
-    
+
     # Generate prompts
     prompts = generate_prompts(NUM_PROMPTS)
-    
+
     # Warmup
     print("\nWarmup: Running 10 prompts...")
     warmup_prompts = prompts[:10]
@@ -129,16 +129,16 @@ def run_inference_sglang(args):
                 "max_new_tokens": MAX_TOKENS,
                 "temperature": TEMPERATURE,
                 "top_p": TOP_P,
-            }
+            },
         )
         print("✓ Warmup complete")
     except Exception as e:
         print(f"⚠️  Warmup failed: {e}")
-    
+
     # Benchmark
     print(f"\nBenchmark: Running {NUM_PROMPTS} prompts...")
     start_time = time.time()
-    
+
     try:
         outputs = runtime.generate(
             prompts,
@@ -146,17 +146,19 @@ def run_inference_sglang(args):
                 "max_new_tokens": MAX_TOKENS,
                 "temperature": TEMPERATURE,
                 "top_p": TOP_P,
-            }
+            },
         )
-        
+
         end_time = time.time()
         elapsed_time = end_time - start_time
-        
+
         # Calculate metrics
-        total_tokens = sum(len(output["meta_info"]["completion_tokens"]) for output in outputs)
+        total_tokens = sum(
+            len(output["meta_info"]["completion_tokens"]) for output in outputs
+        )
         throughput = NUM_PROMPTS / elapsed_time
         tokens_per_second = total_tokens / elapsed_time
-        
+
         # Print results
         print(f"\n{'=' * 70}")
         print("Benchmark Results")
@@ -167,7 +169,7 @@ def run_inference_sglang(args):
         print(f"Token generation: {tokens_per_second:.2f} tokens/second")
         print(f"Average latency: {(elapsed_time / NUM_PROMPTS) * 1000:.2f} ms/request")
         print("=" * 70)
-        
+
         # Print sample outputs
         print("\n" + "=" * 70)
         print("Sample Outputs (first 3)")
@@ -177,22 +179,23 @@ def run_inference_sglang(args):
             generated_text = output["text"]
             print(f"\n[Prompt {i+1}]: {prompt}")
             print(f"[Output {i+1}]: {generated_text[:200]}...")
-        
+
         # madengine output format
         print(f"\nperformance: {throughput:.2f} requests_per_second")
         print(f"tokens_per_second: {tokens_per_second:.2f}")
         print(f"model: {args.model}")
         print(f"tp_size: {args.tp_size}")
         print(f"nnodes: {args.nnodes}")
-        
+
         # Cleanup
         runtime.shutdown()
-        
+
         return 0
-        
+
     except Exception as e:
         print(f"✗ Inference failed: {e}")
         import traceback
+
         traceback.print_exc()
         print("\n⚠️  Falling back to mock inference...")
         return run_inference_mock(args)
@@ -207,35 +210,35 @@ def run_inference_mock(args):
     print("=" * 70)
     print("This simulates SGLang inference for testing madengine infrastructure.")
     print("=" * 70)
-    
+
     # Simulate initialization
     print("\nInitializing mock SGLang runtime...")
     time.sleep(1)
     print("✓ Mock runtime initialized")
-    
+
     # Generate prompts
     prompts = generate_prompts(NUM_PROMPTS)
-    
+
     # Warmup
     print("\nWarmup: Running 10 prompts...")
     time.sleep(0.5)
     print("✓ Warmup complete")
-    
+
     # Benchmark
     print(f"\nBenchmark: Running {NUM_PROMPTS} prompts...")
     start_time = time.time()
-    
+
     # Simulate inference
     time.sleep(2.0)
-    
+
     end_time = time.time()
     elapsed_time = end_time - start_time
-    
+
     # Mock metrics
     total_tokens = NUM_PROMPTS * MAX_TOKENS
     throughput = NUM_PROMPTS / elapsed_time
     tokens_per_second = total_tokens / elapsed_time
-    
+
     # Print results
     print(f"\n{'=' * 70}")
     print("Benchmark Results (Mock)")
@@ -246,7 +249,7 @@ def run_inference_mock(args):
     print(f"Token generation: {tokens_per_second:.2f} tokens/second")
     print(f"Average latency: {(elapsed_time / NUM_PROMPTS) * 1000:.2f} ms/request")
     print("=" * 70)
-    
+
     # Print sample outputs
     print("\n" + "=" * 70)
     print("Sample Outputs (Mock - first 3)")
@@ -254,14 +257,14 @@ def run_inference_mock(args):
     for i in range(3):
         print(f"\n[Prompt {i+1}]: {prompts[i]}")
         print(f"[Output {i+1}]: [Mock generated text for infrastructure testing...]")
-    
+
     # madengine output format
     print(f"\nperformance: {throughput:.2f} requests_per_second")
     print(f"tokens_per_second: {tokens_per_second:.2f}")
     print(f"model: {args.model}")
     print(f"tp_size: {args.tp_size}")
     print(f"nnodes: {args.nnodes}")
-    
+
     return 0
 
 
@@ -274,62 +277,56 @@ def main():
         "--model",
         type=str,
         default=DEFAULT_MODEL,
-        help=f"Model name or path (default: {DEFAULT_MODEL})"
+        help=f"Model name or path (default: {DEFAULT_MODEL})",
     )
     parser.add_argument(
         "--tp-size",
         type=int,
         default=1,
-        help="Tensor parallel size (GPUs per node, default: 1)"
+        help="Tensor parallel size (GPUs per node, default: 1)",
     )
     parser.add_argument(
-        "--nnodes",
-        type=int,
-        default=1,
-        help="Number of nodes (default: 1)"
+        "--nnodes", type=int, default=1, help="Number of nodes (default: 1)"
     )
     parser.add_argument(
-        "--node-rank",
-        type=int,
-        default=0,
-        help="Node rank (0-indexed, default: 0)"
+        "--node-rank", type=int, default=0, help="Node rank (0-indexed, default: 0)"
     )
     parser.add_argument(
         "--master-addr",
         type=str,
         default="localhost",
-        help="Master node address (default: localhost)"
+        help="Master node address (default: localhost)",
     )
     parser.add_argument(
         "--master-port",
         type=int,
         default=29500,
-        help="Master communication port (default: 29500)"
+        help="Master communication port (default: 29500)",
     )
     parser.add_argument(
         "--mock-only",
         action="store_true",
-        help="Force mock inference (skip real SGLang)"
+        help="Force mock inference (skip real SGLang)",
     )
-    
+
     args = parser.parse_args()
-    
+
     # Validate arguments
     if args.tp_size < 1:
         print("Error: tp-size must be >= 1")
         return 1
-    
+
     if args.nnodes < 1:
         print("Error: nnodes must be >= 1")
         return 1
-    
+
     if args.node_rank < 0 or args.node_rank >= args.nnodes:
         print(f"Error: node-rank must be in range [0, {args.nnodes-1}]")
         return 1
-    
+
     # Print configuration
     print_header(args)
-    
+
     # Run inference
     if args.mock_only:
         return run_inference_mock(args)
@@ -346,5 +343,6 @@ def main():
     except Exception as e:
         print(f"\nError: {e}", file=sys.stderr)
         import traceback
+
         traceback.print_exc()
         sys.exit(1)
diff --git a/tests/fixtures/dummy/scripts/dummy_sglang_disagg/requirements.txt b/tests/fixtures/dummy/scripts/dummy_sglang_disagg/requirements.txt
index 25f8ad69..95ffe1d7 100644
--- a/tests/fixtures/dummy/scripts/dummy_sglang_disagg/requirements.txt
+++ b/tests/fixtures/dummy/scripts/dummy_sglang_disagg/requirements.txt
@@ -1,3 +1,2 @@
 # Minimal requirements for dummy test
 # No actual SGLang needed - this is a simulation
-
diff --git a/tests/fixtures/dummy/scripts/dummy_sglang_disagg/run.sh b/tests/fixtures/dummy/scripts/dummy_sglang_disagg/run.sh
index 9661fc17..5d24f16e 100755
--- a/tests/fixtures/dummy/scripts/dummy_sglang_disagg/run.sh
+++ b/tests/fixtures/dummy/scripts/dummy_sglang_disagg/run.sh
@@ -14,7 +14,7 @@ if [ "${SGLANG_DISAGG_MODE:-}" = "enabled" ]; then
     echo "  Node Rank: ${SGLANG_NODE_RANK:-unknown}"
     echo "  Prefill Nodes: ${SGLANG_DISAGG_PREFILL_NODES:-unknown}"
     echo "  Decode Nodes: ${SGLANG_DISAGG_DECODE_NODES:-unknown}"
-    
+
     # Run Python script that handles node roles
     python3 run_sglang_disagg_inference.py
 else
@@ -26,4 +26,3 @@ fi
 echo "============================================"
 echo "✓ SGLang Disagg Test Complete"
 echo "============================================"
-
diff --git a/tests/fixtures/dummy/scripts/dummy_sglang_disagg/run_sglang_disagg_inference.py b/tests/fixtures/dummy/scripts/dummy_sglang_disagg/run_sglang_disagg_inference.py
index 94b476b6..7e65d235 100755
--- a/tests/fixtures/dummy/scripts/dummy_sglang_disagg/run_sglang_disagg_inference.py
+++ b/tests/fixtures/dummy/scripts/dummy_sglang_disagg/run_sglang_disagg_inference.py
@@ -8,9 +8,9 @@
 """
 
 import os
+import socket
 import sys
 import time
-import socket
 from typing import Optional
 
 
@@ -48,23 +48,23 @@ def simulate_proxy_node(info: dict):
     print(f"Prefill Nodes: {info['prefill_nodes']}")
     print(f"Decode Nodes: {info['decode_nodes']}")
     print("-" * 60)
-    
+
     print("\n[Proxy] Initializing load balancer...")
     time.sleep(1)
-    
+
     print("[Proxy] Waiting for prefill nodes to be ready...")
-    for i in range(1, info['prefill_nodes'] + 1):
+    for i in range(1, info["prefill_nodes"] + 1):
         print(f"  ✓ Prefill node {i} connected")
         time.sleep(0.5)
-    
+
     print("[Proxy] Waiting for decode nodes to be ready...")
-    for i in range(info['prefill_nodes'] + 1, info['total_nodes']):
+    for i in range(info["prefill_nodes"] + 1, info["total_nodes"]):
         print(f"  ✓ Decode node {i} connected")
         time.sleep(0.5)
-    
+
     print("\n[Proxy] All nodes connected. Load balancer ready!")
     print("[Proxy] Simulating request routing...")
-    
+
     # Simulate some requests
     for req_id in range(1, 4):
         print(f"\n[Proxy] Request {req_id}:")
@@ -72,10 +72,12 @@ def simulate_proxy_node(info: dict):
         time.sleep(0.3)
         print(f"  → KV cache transferred via Mooncake")
         time.sleep(0.3)
-        print(f"  → Routing to decode node {info['prefill_nodes'] + ((req_id % info['decode_nodes']) + 1)}")
+        print(
+            f"  → Routing to decode node {info['prefill_nodes'] + ((req_id % info['decode_nodes']) + 1)}"
+        )
         time.sleep(0.3)
         print(f"  ✓ Request {req_id} completed")
-    
+
     print("\n[Proxy] Test complete. Shutting down...")
 
 
@@ -89,18 +91,18 @@ def simulate_prefill_node(info: dict):
     print(f"Tensor Parallel Size: {info['tp_size']}")
     print(f"Role: Prompt Processing")
     print("-" * 60)
-    
+
     print("\n[Prefill] Initializing prefill server...")
     time.sleep(1)
-    
+
     print("[Prefill] Loading model shards...")
-    for shard in range(info['tp_size']):
+    for shard in range(info["tp_size"]):
         print(f"  ✓ Shard {shard + 1}/{info['tp_size']} loaded")
         time.sleep(0.3)
-    
+
     print("\n[Prefill] Server ready. Listening for requests...")
     time.sleep(1)
-    
+
     print("[Prefill] Processing prompts...")
     for batch in range(1, 4):
         print(f"\n[Prefill] Batch {batch}:")
@@ -111,7 +113,7 @@ def simulate_prefill_node(info: dict):
         print(f"  → Transferring KV cache via Mooncake...")
         time.sleep(0.3)
         print(f"  ✓ Batch {batch} complete")
-    
+
     print("\n[Prefill] Test complete. Shutting down...")
 
 
@@ -125,18 +127,18 @@ def simulate_decode_node(info: dict):
     print(f"Tensor Parallel Size: {info['tp_size']}")
     print(f"Role: Token Generation")
     print("-" * 60)
-    
+
     print("\n[Decode] Initializing decode server...")
     time.sleep(1)
-    
+
     print("[Decode] Loading model shards...")
-    for shard in range(info['tp_size']):
+    for shard in range(info["tp_size"]):
         print(f"  ✓ Shard {shard + 1}/{info['tp_size']} loaded")
         time.sleep(0.3)
-    
+
     print("\n[Decode] Server ready. Listening for KV caches...")
     time.sleep(1)
-    
+
     print("[Decode] Generating tokens...")
     for batch in range(1, 4):
         print(f"\n[Decode] Batch {batch}:")
@@ -148,7 +150,7 @@ def simulate_decode_node(info: dict):
             time.sleep(0.2)
         print(f"    ✓ Generated 5 tokens")
         print(f"  ✓ Batch {batch} complete")
-    
+
     print("\n[Decode] Test complete. Shutting down...")
 
 
@@ -157,22 +159,24 @@ def main():
     print("\n" + "=" * 60)
     print("SGLang Disaggregated Inference Simulation")
     print("=" * 60 + "\n")
-    
+
     # Get node information
     info = get_node_info()
     role = determine_node_role(info["node_rank"], info["prefill_nodes"])
-    
+
     print(f"Cluster Configuration:")
     print(f"  Total Nodes: {info['total_nodes']}")
     print(f"  Prefill Nodes: {info['prefill_nodes']} (ranks 1-{info['prefill_nodes']})")
-    print(f"  Decode Nodes: {info['decode_nodes']} (ranks {info['prefill_nodes']+1}-{info['total_nodes']-1})")
+    print(
+        f"  Decode Nodes: {info['decode_nodes']} (ranks {info['prefill_nodes']+1}-{info['total_nodes']-1})"
+    )
     print(f"  Proxy Node: 1 (rank 0)")
     print(f"\nThis Node:")
     print(f"  Rank: {info['node_rank']}")
     print(f"  Role: {role.upper()}")
     print(f"  Hostname: {info['hostname']}")
     print()
-    
+
     # Simulate based on role
     try:
         if role == "proxy":
@@ -184,22 +188,22 @@ def main():
         else:
             print(f"❌ ERROR: Unknown role '{role}'")
             sys.exit(1)
-        
+
         print("\n" + "=" * 60)
         print("✅ Simulation Complete")
         print("=" * 60)
         return 0
-        
+
     except KeyboardInterrupt:
         print("\n\n⚠️  Interrupted by user")
         return 130
     except Exception as e:
         print(f"\n❌ ERROR: {e}")
         import traceback
+
         traceback.print_exc()
         return 1
 
 
 if __name__ == "__main__":
     sys.exit(main())
-
diff --git a/tests/fixtures/dummy/scripts/dummy_therock/run.sh b/tests/fixtures/dummy/scripts/dummy_therock/run.sh
index 12cafac4..8d52714f 100755
--- a/tests/fixtures/dummy/scripts/dummy_therock/run.sh
+++ b/tests/fixtures/dummy/scripts/dummy_therock/run.sh
@@ -45,4 +45,3 @@ echo ""
 echo "========================================================================"
 echo "Benchmark completed!"
 echo "========================================================================"
-
diff --git a/tests/fixtures/dummy/scripts/dummy_therock/train_resnet.py b/tests/fixtures/dummy/scripts/dummy_therock/train_resnet.py
index c90fe482..25c8ad27 100755
--- a/tests/fixtures/dummy/scripts/dummy_therock/train_resnet.py
+++ b/tests/fixtures/dummy/scripts/dummy_therock/train_resnet.py
@@ -5,11 +5,12 @@
 This script benchmarks ResNet50 training performance using PyTorch
 on TheRock's ROCm distribution.
 """
+import sys
+import time
+
 import torch
 import torch.nn as nn
 import torchvision.models as models
-import time
-import sys
 
 # Configuration
 BATCH_SIZE = 64
@@ -21,80 +22,80 @@ def main():
     print("=" * 70)
     print("ResNet50 Training Benchmark (TheRock)")
     print("=" * 70)
-    
+
     # Setup device
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
     print(f"Device: {device}")
-    
+
     if torch.cuda.is_available():
         print(f"GPU: {torch.cuda.get_device_name(0)}")
         print(f"GPU Count: {torch.cuda.device_count()}")
-    
+
     # Create model
     print("\nCreating ResNet50 model...")
     model = models.resnet50(pretrained=False, num_classes=1000).to(device)
     model.train()
-    
+
     # Setup optimizer and loss
     optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
     criterion = nn.CrossEntropyLoss()
-    
+
     print(f"Batch Size: {BATCH_SIZE}")
     print(f"Iterations: {NUM_ITERATIONS}")
     print(f"Image Size: {IMAGE_SIZE}x{IMAGE_SIZE}")
-    
+
     # Warmup
     print("\nWarming up (10 iterations)...")
     for _ in range(10):
         images = torch.randn(BATCH_SIZE, 3, IMAGE_SIZE, IMAGE_SIZE, device=device)
         labels = torch.randint(0, 1000, (BATCH_SIZE,), device=device)
-        
+
         optimizer.zero_grad()
         outputs = model(images)
         loss = criterion(outputs, labels)
         loss.backward()
         optimizer.step()
-    
+
     if torch.cuda.is_available():
         torch.cuda.synchronize()
-    
+
     # Benchmark
     print(f"Running benchmark ({NUM_ITERATIONS} iterations)...")
     start_time = time.time()
-    
+
     for i in range(NUM_ITERATIONS):
         images = torch.randn(BATCH_SIZE, 3, IMAGE_SIZE, IMAGE_SIZE, device=device)
         labels = torch.randint(0, 1000, (BATCH_SIZE,), device=device)
-        
+
         optimizer.zero_grad()
         outputs = model(images)
         loss = criterion(outputs, labels)
         loss.backward()
         optimizer.step()
-        
+
         if (i + 1) % 20 == 0:
             print(f"  Progress: {i + 1}/{NUM_ITERATIONS}")
-    
+
     if torch.cuda.is_available():
         torch.cuda.synchronize()
-    
+
     end_time = time.time()
-    
+
     # Calculate metrics
     duration = end_time - start_time
     total_images = BATCH_SIZE * NUM_ITERATIONS
     images_per_sec = total_images / duration
-    
+
     print("\n" + "=" * 70)
     print("Benchmark Results:")
     print(f"  Total Images Processed: {total_images}")
     print(f"  Duration: {duration:.2f} seconds")
     print(f"  Throughput: {images_per_sec:.2f} images/sec")
     print("=" * 70)
-    
+
     # madengine performance output (required format)
     print(f"\nperformance: {images_per_sec:.2f} images_per_second")
-    
+
     return 0
 
 
@@ -104,6 +105,6 @@ def main():
     except Exception as e:
         print(f"Error: {e}", file=sys.stderr)
         import traceback
+
         traceback.print_exc()
         sys.exit(1)
-
diff --git a/tests/fixtures/dummy/scripts/dummy_torchrun/helper.py b/tests/fixtures/dummy/scripts/dummy_torchrun/helper.py
index e705ce30..287d4d3f 100644
--- a/tests/fixtures/dummy/scripts/dummy_torchrun/helper.py
+++ b/tests/fixtures/dummy/scripts/dummy_torchrun/helper.py
@@ -15,24 +15,33 @@
 
 class ResidualBlock(nn.Module):
     """Residual block with skip connection"""
+
     def __init__(self, in_channels, out_channels, stride=1):
         super(ResidualBlock, self).__init__()
-        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 
-                               stride=stride, padding=1, bias=False)
+        self.conv1 = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias=False,
+        )
         self.bn1 = nn.BatchNorm2d(out_channels)
-        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3,
-                               stride=1, padding=1, bias=False)
+        self.conv2 = nn.Conv2d(
+            out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False
+        )
         self.bn2 = nn.BatchNorm2d(out_channels)
-        
+
         # Skip connection
         self.skip = nn.Sequential()
         if stride != 1 or in_channels != out_channels:
             self.skip = nn.Sequential(
-                nn.Conv2d(in_channels, out_channels, kernel_size=1, 
-                         stride=stride, bias=False),
-                nn.BatchNorm2d(out_channels)
+                nn.Conv2d(
+                    in_channels, out_channels, kernel_size=1, stride=stride, bias=False
+                ),
+                nn.BatchNorm2d(out_channels),
             )
-    
+
     def forward(self, x):
         out = F.relu(self.bn1(self.conv1(x)))
         out = self.bn2(self.conv2(out))
@@ -44,29 +53,30 @@ def forward(self, x):
 class ResNetModel(nn.Module):
     """
     ResNet-style model for distributed training benchmark.
-    
+
     This is a more realistic model architecture compared to SimpleCNN,
     demonstrating residual connections and deeper networks.
     """
+
     def __init__(self, num_classes=1000, num_blocks=[2, 2, 2, 2]):
         super(ResNetModel, self).__init__()
         self.in_channels = 64
-        
+
         # Initial convolution
         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
         self.bn1 = nn.BatchNorm2d(64)
         self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        
+
         # Residual layers
         self.layer1 = self._make_layer(64, num_blocks[0], stride=1)
         self.layer2 = self._make_layer(128, num_blocks[1], stride=2)
         self.layer3 = self._make_layer(256, num_blocks[2], stride=2)
         self.layer4 = self._make_layer(512, num_blocks[3], stride=2)
-        
+
         # Classification head
         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
         self.fc = nn.Linear(512, num_classes)
-    
+
     def _make_layer(self, out_channels, num_blocks, stride):
         """Create a layer with multiple residual blocks"""
         strides = [stride] + [1] * (num_blocks - 1)
@@ -75,7 +85,7 @@ def _make_layer(self, out_channels, num_blocks, stride):
             layers.append(ResidualBlock(self.in_channels, out_channels, stride))
             self.in_channels = out_channels
         return nn.Sequential(*layers)
-    
+
     def forward(self, x):
         out = self.pool(F.relu(self.bn1(self.conv1(x))))
         out = self.layer1(out)
@@ -91,31 +101,33 @@ def forward(self, x):
 class SyntheticDataset:
     """
     Synthetic dataset generator for benchmarking.
-    
+
     Generates random data on-the-fly to avoid I/O bottlenecks
     and provide consistent benchmarking results.
     """
+
     def __init__(self, num_samples, batch_size, image_size=224, num_classes=1000):
         self.num_samples = num_samples
         self.batch_size = batch_size
         self.image_size = image_size
         self.num_classes = num_classes
         self.num_batches = num_samples // batch_size
-    
+
     def generate_batch(self, device):
         """Generate a synthetic batch of images and labels"""
-        images = torch.randn(self.batch_size, 3, self.image_size, 
-                            self.image_size, device=device)
-        labels = torch.randint(0, self.num_classes, (self.batch_size,), 
-                              device=device)
+        images = torch.randn(
+            self.batch_size, 3, self.image_size, self.image_size, device=device
+        )
+        labels = torch.randint(0, self.num_classes, (self.batch_size,), device=device)
         return images, labels
-    
+
     def __len__(self):
         return self.num_batches
 
 
 class BenchmarkConfig:
     """Configuration for distributed training benchmark"""
+
     def __init__(self):
         # Training hyperparameters
         self.batch_size = 128
@@ -123,16 +135,16 @@ def __init__(self):
         self.learning_rate = 0.01
         self.momentum = 0.9
         self.weight_decay = 1e-4
-        
+
         # Data configuration
         self.image_size = 224
         self.num_classes = 1000
         self.num_batches = 100
-        
+
         # Model configuration
         self.model_type = "resnet"  # or "simple_cnn"
         self.resnet_blocks = [2, 2, 2, 2]  # ResNet-18 style
-    
+
     def __str__(self):
         return (
             f"BenchmarkConfig(\n"
@@ -150,7 +162,7 @@ def print_distributed_info(rank, local_rank, world_size):
     """Print distributed training information"""
     import socket
     import os
-    
+
     print(f"\n[Rank {rank}] Distributed Training Info:")
     print(f"  Hostname: {socket.gethostname()}")
     print(f"  Global Rank: {rank}")
@@ -166,7 +178,9 @@ def print_gpu_info(rank, device):
         print(f"\n[Rank {rank}] GPU Info:")
         print(f"  Device: {device}")
         print(f"  GPU Name: {torch.cuda.get_device_name(device)}")
-        print(f"  GPU Memory: {torch.cuda.get_device_properties(device).total_memory / 1e9:.2f} GB")
+        print(
+            f"  GPU Memory: {torch.cuda.get_device_properties(device).total_memory / 1e9:.2f} GB"
+        )
     else:
         print(f"\n[Rank {rank}] Warning: CUDA not available, using CPU")
 
diff --git a/tests/fixtures/dummy/scripts/dummy_torchrun/run.sh b/tests/fixtures/dummy/scripts/dummy_torchrun/run.sh
index bc0f2318..fdbffe84 100755
--- a/tests/fixtures/dummy/scripts/dummy_torchrun/run.sh
+++ b/tests/fixtures/dummy/scripts/dummy_torchrun/run.sh
@@ -19,10 +19,10 @@ cd "$SCRIPT_DIR"
 if [ -z "$MAD_MULTI_NODE_RUNNER" ]; then
     # Get number of GPUs from environment
     N_GPUS="${MAD_RUNTIME_NGPUS:-1}"
-    
+
     echo "ℹ️  MAD_MULTI_NODE_RUNNER not set, using standalone torchrun"
     echo "ℹ️  Using $N_GPUS GPUs"
-    
+
     MAD_MULTI_NODE_RUNNER="torchrun --standalone --nproc_per_node=$N_GPUS"
 fi
 
diff --git a/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun.py b/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun.py
index 204ae985..69abec8b 100644
--- a/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun.py
+++ b/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun.py
@@ -13,22 +13,23 @@
 Usage:
   # Single GPU
   torchrun --standalone --nproc_per_node=1 run_torchrun.py
-  
+
   # Multi-GPU (single node)
   torchrun --standalone --nproc_per_node=8 run_torchrun.py
-  
+
   # Multi-node (via K8s with torchrun)
   torchrun --nnodes=4 --nproc_per_node=8 --master_addr=... run_torchrun.py
 """
 
 import os
+import socket
 import sys
 import time
-import socket
+
 import torch
+import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
-import torch.distributed as dist
 from torch.nn.parallel import DistributedDataParallel as DDP
 
 # Configuration
@@ -68,21 +69,22 @@ def print_header():
 
 class SimpleCNN(nn.Module):
     """Simple CNN model for benchmarking"""
+
     def __init__(self, num_classes=1000):
         super(SimpleCNN, self).__init__()
         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
         self.bn1 = nn.BatchNorm2d(64)
         self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        
+
         self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
         self.bn2 = nn.BatchNorm2d(128)
-        
+
         self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
         self.bn3 = nn.BatchNorm2d(256)
-        
+
         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
         self.fc = nn.Linear(256, num_classes)
-    
+
     def forward(self, x):
         x = self.pool(F.relu(self.bn1(self.conv1(x))))
         x = self.pool(F.relu(self.bn2(self.conv2(x))))
@@ -106,62 +108,64 @@ def train_epoch(model, optimizer, criterion, epoch, device):
     epoch_start = time.time()
     total_samples = 0
     total_loss = 0.0
-    
+
     for batch_idx in range(NUM_BATCHES):
         batch_start = time.time()
-        
+
         # Generate synthetic data
         images, labels = generate_synthetic_batch(BATCH_SIZE, device)
-        
+
         # Forward pass
         optimizer.zero_grad()
         outputs = model(images)
         loss = criterion(outputs, labels)
-        
+
         # Backward pass (gradients are automatically synchronized across GPUs)
         loss.backward()
-        
+
         # Update weights
         optimizer.step()
-        
+
         batch_time = time.time() - batch_start
         total_samples += BATCH_SIZE
         total_loss += loss.item()
-        
+
         # Print progress from local rank 0 on each node
         if local_rank == 0 and (batch_idx + 1) % 20 == 0:
             avg_loss = total_loss / (batch_idx + 1)
             throughput = BATCH_SIZE / batch_time  # Local throughput
-            print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
-                  f"Batch [{batch_idx+1}/{NUM_BATCHES}] "
-                  f"Loss: {loss.item():.4f} "
-                  f"Throughput: {throughput:.2f} samples/sec (local)")
-    
+            print(
+                f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
+                f"Batch [{batch_idx+1}/{NUM_BATCHES}] "
+                f"Loss: {loss.item():.4f} "
+                f"Throughput: {throughput:.2f} samples/sec (local)"
+            )
+
     epoch_time = time.time() - epoch_start
     avg_loss = total_loss / NUM_BATCHES
-    
+
     # ========================================================================
     # Node-Local Throughput Measurement
     # ========================================================================
     # Calculate throughput for ALL GPUs on THIS NODE
     local_samples = NUM_BATCHES * BATCH_SIZE
     local_gpu_throughput = local_samples / epoch_time
-    
+
     # Get local world size (GPUs per node)
     local_world_size = int(os.environ.get("LOCAL_WORLD_SIZE", 1))
-    
+
     # Node throughput = sum of all local GPUs on this node
     # In data parallel, each GPU processes the same throughput
     node_throughput = local_gpu_throughput * local_world_size
-    
+
     # Return metrics dictionary
     metrics = {
-        'avg_loss': avg_loss,
-        'node_throughput': node_throughput,
-        'epoch_time': epoch_time,
-        'local_world_size': local_world_size
+        "avg_loss": avg_loss,
+        "node_throughput": node_throughput,
+        "epoch_time": epoch_time,
+        "local_world_size": local_world_size,
     }
-    
+
     return metrics
 
 
@@ -169,9 +173,9 @@ def main():
     """Main training function"""
     # Start timer for total test duration
     test_start_time = time.time()
-    
+
     print_header()
-    
+
     # Create per-process MIOpen cache directory to avoid database conflicts
     # This must be done AFTER torchrun sets LOCAL_RANK environment variable
     # This prevents "Duplicate ID" errors and database corruption in multi-GPU training
@@ -180,10 +184,12 @@ def main():
         # Cannot use expandvars() because the template uses ${LOCAL_RANK} syntax
         miopen_template = os.environ["MIOPEN_USER_DB_PATH"]
         # Replace ${LOCAL_RANK} or $LOCAL_RANK with actual value
-        miopen_path = miopen_template.replace("${LOCAL_RANK:-0}", str(local_rank)).replace("$LOCAL_RANK", str(local_rank))
+        miopen_path = miopen_template.replace(
+            "${LOCAL_RANK:-0}", str(local_rank)
+        ).replace("$LOCAL_RANK", str(local_rank))
         os.makedirs(miopen_path, exist_ok=True)
         print(f"[Rank {rank}] ✓ Created MIOpen cache directory: {miopen_path}")
-    
+
     # Initialize distributed training
     if world_size > 1:
         print(f"\n[Rank {rank}] Initializing distributed process group...")
@@ -192,37 +198,41 @@ def main():
             backend="nccl",
             init_method=f"env://",  # Use environment variables (set by torchrun)
             world_size=world_size,
-            rank=rank
+            rank=rank,
         )
         print(f"[Rank {rank}] ✓ Process group initialized")
         print(f"[Rank {rank}]   Backend: {dist.get_backend()}")
         print(f"[Rank {rank}]   World Size: {dist.get_world_size()}")
     else:
         print(f"\n=== Running in Standalone Mode (Single GPU) ===")
-    
+
     # Set device
     if torch.cuda.is_available():
         num_gpus = torch.cuda.device_count()
         print(f"[Rank {rank}] PyTorch sees {num_gpus} GPU(s)")
-        print(f"[Rank {rank}] LOCAL_RANK={local_rank}, attempting to use cuda:{local_rank}")
-        
+        print(
+            f"[Rank {rank}] LOCAL_RANK={local_rank}, attempting to use cuda:{local_rank}"
+        )
+
         if local_rank >= num_gpus:
-            print(f"[Rank {rank}] ERROR: LOCAL_RANK {local_rank} >= available GPUs {num_gpus}")
+            print(
+                f"[Rank {rank}] ERROR: LOCAL_RANK {local_rank} >= available GPUs {num_gpus}"
+            )
             print(f"[Rank {rank}] Using cuda:0 instead")
             device = torch.device("cuda:0")
         else:
             device = torch.device(f"cuda:{local_rank}")
-        
+
         torch.cuda.set_device(device)
         print(f"[Rank {rank}] Using GPU: {torch.cuda.get_device_name(device)}")
     else:
         device = torch.device("cpu")
         print(f"[Rank {rank}] Warning: CUDA not available, using CPU")
-    
+
     # Create model
     print(f"\n[Rank {rank}] Creating model...")
     model = SimpleCNN(num_classes=NUM_CLASSES).to(device)
-    
+
     # Wrap model with DDP for distributed training
     if world_size > 1:
         # Best practice: Explicitly specify device_ids for DDP
@@ -231,53 +241,57 @@ def main():
             device_ids=[local_rank],
             output_device=local_rank,
             broadcast_buffers=True,  # Ensure buffers (like BatchNorm stats) are synced
-            find_unused_parameters=False  # Set True only if needed (performance impact)
+            find_unused_parameters=False,  # Set True only if needed (performance impact)
         )
         print(f"[Rank {rank}] ✓ Model wrapped with DistributedDataParallel")
-    
+
     # Create optimizer and loss function
     optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
     criterion = nn.CrossEntropyLoss()
-    
+
     # Synchronize before training
     if world_size > 1:
         # Best practice: Specify device to avoid warnings
         dist.barrier(device_ids=[local_rank])
-    
+
     # Get topology information early (needed for logging)
     local_world_size = int(os.environ.get("LOCAL_WORLD_SIZE", 1))
     node_rank = rank // local_world_size if local_world_size > 0 else 0
-    
+
     if local_rank == 0:
         print(f"\n{'='*70}")
         print(f"[Node {node_rank}] Starting Training")
         print(f"{'='*70}")
-    
+
     # Training loop
     all_metrics = []
     for epoch in range(NUM_EPOCHS):
-        metrics = train_epoch(
-            model, optimizer, criterion, epoch, device
-        )
+        metrics = train_epoch(model, optimizer, criterion, epoch, device)
         all_metrics.append(metrics)
-        
+
         if local_rank == 0:
             print(f"\n[Node {node_rank}] Epoch [{epoch+1}/{NUM_EPOCHS}] Complete:")
             print(f"  Average Loss: {metrics['avg_loss']:.4f}")
             print(f"  Node Throughput: {metrics['node_throughput']:.2f} samples/sec")
             print(f"  Local GPUs: {metrics['local_world_size']}")
-    
+
     # Calculate average node throughput across all epochs
-    avg_node_throughput = sum(m['node_throughput'] for m in all_metrics) / len(all_metrics)
-    avg_epoch_time = sum(m['epoch_time'] for m in all_metrics) / len(all_metrics)
-    
+    avg_node_throughput = sum(m["node_throughput"] for m in all_metrics) / len(
+        all_metrics
+    )
+    avg_epoch_time = sum(m["epoch_time"] for m in all_metrics) / len(all_metrics)
+
     # Calculate num_nodes for reference
-    num_nodes = (world_size + local_world_size - 1) // local_world_size if local_world_size > 0 else 1
-    
+    num_nodes = (
+        (world_size + local_world_size - 1) // local_world_size
+        if local_world_size > 0
+        else 1
+    )
+
     # Synchronize before final output
     if world_size > 1:
         dist.barrier(device_ids=[local_rank])
-    
+
     # ========================================================================
     # Node-Local Performance Reporting (NEW - Best Practice)
     # Each node reports its OWN performance
@@ -293,24 +307,23 @@ def main():
         print(f"Node Throughput: {avg_node_throughput:.2f} samples_per_second")
         print(f"Avg Time per Epoch: {avg_epoch_time:.2f}s")
         print(f"{'='*70}")
-        
+
         # CRITICAL: Standard output format for madengine parsing
         print(f"performance: {avg_node_throughput:.2f} samples_per_second", flush=True)
         print(f"node_id: {node_rank}", flush=True)
         print(f"local_gpus: {local_world_size}", flush=True)
-        
+
         # Calculate and print test duration
         test_duration = time.time() - test_start_time
         print(f"test_duration: {test_duration:.2f}s", flush=True)
         sys.stdout.flush()
 
-    
     # Cleanup
     if world_size > 1:
         dist.destroy_process_group()
         if rank == 0:
             print(f"✓ Process group destroyed")
-    
+
     return 0
 
 
@@ -320,5 +333,6 @@ def main():
     except Exception as e:
         print(f"[Rank {rank}] ✗ Error: {e}", file=sys.stderr)
         import traceback
+
         traceback.print_exc()
         sys.exit(1)
diff --git a/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_data_minio.py b/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_data_minio.py
index 26c9c236..8dbcd314 100755
--- a/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_data_minio.py
+++ b/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_data_minio.py
@@ -22,14 +22,15 @@
 """
 
 import os
+import pathlib
+import socket
 import sys
 import time
-import socket
-import pathlib
+
 import torch
+import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
-import torch.distributed as dist
 from torch.nn.parallel import DistributedDataParallel as DDP
 
 # Configuration
@@ -75,23 +76,23 @@ def print_header():
 def validate_data_availability():
     """
     Validate that required data is available (K8s best practice).
-    
+
     Strategy:
     1. Rank 0 checks data first and reports status
     2. All ranks independently validate data (no barrier needed before init_process_group)
     3. Exit gracefully if data missing
-    
+
     Note: For K8s deployments, MAD_DATAHOME points to PVC mount point (/data).
     This ensures data is shared across all pods (single-node and multi-node).
     PVC must be configured with ReadWriteMany for multi-node deployments.
-    
+
     Returns:
         bool: True if data is available, False otherwise
     """
     # K8s best practice: Data stored in PVC at /data (separate from compute pods)
     data_home = os.environ.get("MAD_DATAHOME", "/data")
     data_path = pathlib.Path(data_home) / DATA_FILE
-    
+
     if rank == 0:
         print(f"\n{'='*70}")
         print("Data Provider Validation")
@@ -99,7 +100,7 @@ def validate_data_availability():
         print(f"Data Home: {data_home}")
         print(f"Expected File: {DATA_FILE}")
         print(f"Full Path: {data_path}")
-        
+
         if data_path.exists():
             file_size = data_path.stat().st_size
             file_size_mb = file_size / (1024 * 1024)
@@ -113,39 +114,40 @@ def validate_data_availability():
             print(f"\n⚠️  Data provider should have downloaded this file.")
             print(f"   Check data provider configuration and logs.")
         print(f"{'='*70}\n")
-    
+
     # Note: Cannot use dist.barrier() here - process group not initialized yet
     # Data validation happens before distributed initialization
     # All ranks will independently validate data availability without synchronization
-    
+
     # All ranks independently validate data exists
     data_available = data_path.exists()
-    
+
     if not data_available:
         print(f"[Rank {rank}] ❌ ERROR: Data file not found at {data_path}")
     else:
         print(f"[Rank {rank}] ✅ Data file validated: {data_path}")
-    
+
     return data_available
 
 
 class SimpleCNN(nn.Module):
     """Simple CNN model for benchmarking"""
+
     def __init__(self, num_classes=1000):
         super(SimpleCNN, self).__init__()
         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
         self.bn1 = nn.BatchNorm2d(64)
         self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        
+
         self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
         self.bn2 = nn.BatchNorm2d(128)
-        
+
         self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
         self.bn3 = nn.BatchNorm2d(256)
-        
+
         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
         self.fc = nn.Linear(256, num_classes)
-    
+
     def forward(self, x):
         x = self.pool(F.relu(self.bn1(self.conv1(x))))
         x = self.pool(F.relu(self.bn2(self.conv2(x))))
@@ -169,106 +171,114 @@ def train_epoch(model, optimizer, criterion, epoch, device):
     epoch_start = time.time()
     total_samples = 0
     total_loss = 0.0
-    
+
     for batch_idx in range(NUM_BATCHES):
         batch_start = time.time()
-        
+
         # Generate synthetic data
         images, labels = generate_synthetic_batch(BATCH_SIZE, device)
-        
+
         # Forward pass
         optimizer.zero_grad()
         outputs = model(images)
         loss = criterion(outputs, labels)
-        
+
         # Backward pass (gradients are automatically synchronized across GPUs)
         loss.backward()
-        
+
         # Update weights
         optimizer.step()
-        
+
         batch_time = time.time() - batch_start
         total_samples += BATCH_SIZE
         total_loss += loss.item()
-        
+
         # Print progress from rank 0
         if rank == 0 and (batch_idx + 1) % 20 == 0:
             avg_loss = total_loss / (batch_idx + 1)
             throughput = BATCH_SIZE * world_size / batch_time
-            print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
-                  f"Batch [{batch_idx+1}/{NUM_BATCHES}] "
-                  f"Loss: {loss.item():.4f} "
-                  f"Throughput: {throughput:.2f} samples/sec")
-    
+            print(
+                f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
+                f"Batch [{batch_idx+1}/{NUM_BATCHES}] "
+                f"Loss: {loss.item():.4f} "
+                f"Throughput: {throughput:.2f} samples/sec"
+            )
+
     epoch_time = time.time() - epoch_start
     avg_loss = total_loss / NUM_BATCHES
-    
+
     # ========================================================================
     # Accurate Distributed Throughput Measurement (Best Practice)
     # ========================================================================
     # Calculate local throughput for this rank
     local_samples = NUM_BATCHES * BATCH_SIZE
     local_throughput = local_samples / epoch_time
-    
+
     # Aggregate metrics across all ranks using all_reduce
     if world_size > 1:
         # Convert to tensors for all_reduce
         local_throughput_tensor = torch.tensor([local_throughput], device=device)
         epoch_time_tensor = torch.tensor([epoch_time], device=device)
-        
+
         # Sum all local throughputs to get true global throughput
         global_throughput_tensor = local_throughput_tensor.clone()
         dist.all_reduce(global_throughput_tensor, op=dist.ReduceOp.SUM)
-        
+
         # Get max epoch time (slowest node determines overall speed)
         max_epoch_time_tensor = epoch_time_tensor.clone()
         dist.all_reduce(max_epoch_time_tensor, op=dist.ReduceOp.MAX)
-        
+
         # Get min epoch time (fastest node)
         min_epoch_time_tensor = epoch_time_tensor.clone()
         dist.all_reduce(min_epoch_time_tensor, op=dist.ReduceOp.MIN)
-        
+
         global_throughput = global_throughput_tensor.item()
         max_epoch_time = max_epoch_time_tensor.item()
         min_epoch_time = min_epoch_time_tensor.item()
-        
+
         # Calculate load imbalance
-        time_imbalance = ((max_epoch_time - min_epoch_time) / max_epoch_time) * 100 if max_epoch_time > 0 else 0.0
-        
+        time_imbalance = (
+            ((max_epoch_time - min_epoch_time) / max_epoch_time) * 100
+            if max_epoch_time > 0
+            else 0.0
+        )
+
     else:
         # Single GPU
         global_throughput = local_throughput
         max_epoch_time = epoch_time
         min_epoch_time = epoch_time
         time_imbalance = 0.0
-    
+
     # Return metrics dictionary
     metrics = {
-        'avg_loss': avg_loss,
-        'local_throughput': local_throughput,
-        'global_throughput': global_throughput,
-        'epoch_time': epoch_time,
-        'max_epoch_time': max_epoch_time,
-        'min_epoch_time': min_epoch_time,
-        'time_imbalance': time_imbalance
+        "avg_loss": avg_loss,
+        "local_throughput": local_throughput,
+        "global_throughput": global_throughput,
+        "epoch_time": epoch_time,
+        "max_epoch_time": max_epoch_time,
+        "min_epoch_time": min_epoch_time,
+        "time_imbalance": time_imbalance,
     }
-    
+
     return metrics
 
 
 def main():
     """Main training function"""
     print_header()
-    
+
     # Create per-process MIOpen cache directory to avoid database conflicts
     # This must be done AFTER torchrun sets LOCAL_RANK environment variable
     if "MIOPEN_USER_DB_PATH" in os.environ:
         # Construct the per-process MIOpen path using actual local_rank value
         miopen_template = os.environ["MIOPEN_USER_DB_PATH"]
-        miopen_path = miopen_template.replace("${LOCAL_RANK:-0}", str(local_rank)).replace("$LOCAL_RANK", str(local_rank))
+        miopen_path = miopen_template.replace(
+            "${LOCAL_RANK:-0}", str(local_rank)
+        ).replace("$LOCAL_RANK", str(local_rank))
         os.makedirs(miopen_path, exist_ok=True)
         print(f"[Rank {rank}] ✓ Created MIOpen cache directory: {miopen_path}")
-    
+
     # ========================================================================
     # K8s Best Practice: Validate Data Before Initializing Training
     # ========================================================================
@@ -276,10 +286,10 @@ def main():
         print(f"\n{'='*70}")
         print("Step 1: Data Provider Validation")
         print(f"{'='*70}")
-    
+
     # Validate data availability (all ranks)
     data_available = validate_data_availability()
-    
+
     if not data_available:
         # Exit gracefully if data is not available
         if rank == 0:
@@ -288,10 +298,10 @@ def main():
             print(f"{'='*70}")
             print("Exiting...")
         sys.exit(1)
-    
+
     if rank == 0:
         print(f"\n✅ Data validation complete - proceeding with training\n")
-    
+
     # ========================================================================
     # Initialize Distributed Training
     # ========================================================================
@@ -300,44 +310,48 @@ def main():
             print(f"{'='*70}")
             print("Step 2: Initialize Distributed Training")
             print(f"{'='*70}")
-        
+
         print(f"\n[Rank {rank}] Initializing distributed process group...")
         # Best practice: Specify device_ids to avoid PyTorch warnings
         dist.init_process_group(
             backend="nccl",
             init_method=f"env://",  # Use environment variables (set by torchrun)
             world_size=world_size,
-            rank=rank
+            rank=rank,
         )
         print(f"[Rank {rank}] ✓ Process group initialized")
         print(f"[Rank {rank}]   Backend: {dist.get_backend()}")
         print(f"[Rank {rank}]   World Size: {dist.get_world_size()}")
     else:
         print(f"\n=== Running in Standalone Mode (Single GPU) ===")
-    
+
     # Set device
     if torch.cuda.is_available():
         num_gpus = torch.cuda.device_count()
         print(f"[Rank {rank}] PyTorch sees {num_gpus} GPU(s)")
-        print(f"[Rank {rank}] LOCAL_RANK={local_rank}, attempting to use cuda:{local_rank}")
-        
+        print(
+            f"[Rank {rank}] LOCAL_RANK={local_rank}, attempting to use cuda:{local_rank}"
+        )
+
         if local_rank >= num_gpus:
-            print(f"[Rank {rank}] ERROR: LOCAL_RANK {local_rank} >= available GPUs {num_gpus}")
+            print(
+                f"[Rank {rank}] ERROR: LOCAL_RANK {local_rank} >= available GPUs {num_gpus}"
+            )
             print(f"[Rank {rank}] Using cuda:0 instead")
             device = torch.device("cuda:0")
         else:
             device = torch.device(f"cuda:{local_rank}")
-        
+
         torch.cuda.set_device(device)
         print(f"[Rank {rank}] Using GPU: {torch.cuda.get_device_name(device)}")
     else:
         device = torch.device("cpu")
         print(f"[Rank {rank}] Warning: CUDA not available, using CPU")
-    
+
     # Create model
     print(f"\n[Rank {rank}] Creating model...")
     model = SimpleCNN(num_classes=NUM_CLASSES).to(device)
-    
+
     # Wrap model with DDP for distributed training
     if world_size > 1:
         # Best practice: Explicitly specify device_ids for DDP
@@ -346,89 +360,105 @@ def main():
             device_ids=[local_rank],
             output_device=local_rank,
             broadcast_buffers=True,  # Ensure buffers (like BatchNorm stats) are synced
-            find_unused_parameters=False  # Set True only if needed (performance impact)
+            find_unused_parameters=False,  # Set True only if needed (performance impact)
         )
         print(f"[Rank {rank}] ✓ Model wrapped with DistributedDataParallel")
-    
+
     # Create optimizer and loss function
     optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
     criterion = nn.CrossEntropyLoss()
-    
+
     # Synchronize before training
     if world_size > 1:
         # Best practice: Specify device to avoid warnings
         dist.barrier(device_ids=[local_rank])
-    
+
     if rank == 0:
         print(f"\n{'='*70}")
         print("Starting Training")
         print(f"{'='*70}")
-    
+
     # Training loop
     all_metrics = []
     for epoch in range(NUM_EPOCHS):
-        metrics = train_epoch(
-            model, optimizer, criterion, epoch, device
-        )
+        metrics = train_epoch(model, optimizer, criterion, epoch, device)
         all_metrics.append(metrics)
-        
+
         if rank == 0:
             print(f"\nEpoch [{epoch+1}/{NUM_EPOCHS}] Complete:")
             print(f"  Average Loss: {metrics['avg_loss']:.4f}")
-            print(f"  Global Throughput: {metrics['global_throughput']:.2f} samples/sec")
+            print(
+                f"  Global Throughput: {metrics['global_throughput']:.2f} samples/sec"
+            )
             print(f"  Images/sec: {metrics['global_throughput']:.2f}")
-            
+
             # Show load imbalance warning if significant
-            if metrics['time_imbalance'] > 5.0:
+            if metrics["time_imbalance"] > 5.0:
                 print(f"  ⚠️  Load Imbalance: {metrics['time_imbalance']:.1f}%")
-    
+
     # Calculate average metrics across all epochs
-    avg_global_throughput = sum(m['global_throughput'] for m in all_metrics) / len(all_metrics)
-    avg_local_throughput = sum(m['local_throughput'] for m in all_metrics) / len(all_metrics)
-    avg_time_imbalance = sum(m['time_imbalance'] for m in all_metrics) / len(all_metrics)
-    
+    avg_global_throughput = sum(m["global_throughput"] for m in all_metrics) / len(
+        all_metrics
+    )
+    avg_local_throughput = sum(m["local_throughput"] for m in all_metrics) / len(
+        all_metrics
+    )
+    avg_time_imbalance = sum(m["time_imbalance"] for m in all_metrics) / len(
+        all_metrics
+    )
+
     # Get topology information
     nproc_per_node = int(os.environ.get("LOCAL_WORLD_SIZE", world_size))
-    num_nodes = (world_size + nproc_per_node - 1) // nproc_per_node if nproc_per_node > 0 else 1
+    num_nodes = (
+        (world_size + nproc_per_node - 1) // nproc_per_node if nproc_per_node > 0 else 1
+    )
     node_rank = rank // nproc_per_node if nproc_per_node > 0 else 0
-    
+
     # Synchronize before final output
     if world_size > 1:
         dist.barrier(device_ids=[local_rank])
-    
+
     # Each node's rank 0 reports local performance
     if local_rank == 0:
         print(f"\n[Node {node_rank}] Local Performance Summary:")
-        print(f"  Node Throughput: {avg_local_throughput * nproc_per_node:.2f} samples/sec")
+        print(
+            f"  Node Throughput: {avg_local_throughput * nproc_per_node:.2f} samples/sec"
+        )
         print(f"  GPUs on Node: {nproc_per_node}")
         print(f"  Avg Time per Epoch: {all_metrics[-1]['epoch_time']:.2f}s")
-    
+
     # Synchronize again before global rank 0 output
     if world_size > 1:
         dist.barrier(device_ids=[local_rank])
-    
+
     # Global rank 0 reports aggregated performance
     if rank == 0:
         print(f"\n{'='*70}")
         print("Training Complete - GLOBAL METRICS")
         print(f"{'='*70}")
-        print(f"Topology: {num_nodes} nodes × {nproc_per_node} GPUs/node = {world_size} total GPUs")
+        print(
+            f"Topology: {num_nodes} nodes × {nproc_per_node} GPUs/node = {world_size} total GPUs"
+        )
         print(f"Global Throughput: {avg_global_throughput:.2f} samples/sec")
         print(f"Per-GPU Throughput: {avg_global_throughput/world_size:.2f} samples/sec")
         print(f"Global Batch Size: {BATCH_SIZE * world_size}")
-        
+
         # Calculate scaling efficiency
         # Ideal throughput = single GPU throughput * number of GPUs
         ideal_single_gpu_throughput = avg_global_throughput / world_size
         ideal_throughput = ideal_single_gpu_throughput * world_size
-        scaling_efficiency = (avg_global_throughput / ideal_throughput) * 100 if ideal_throughput > 0 else 100.0
+        scaling_efficiency = (
+            (avg_global_throughput / ideal_throughput) * 100
+            if ideal_throughput > 0
+            else 100.0
+        )
         print(f"Scaling Efficiency: {scaling_efficiency:.1f}%")
-        
+
         if avg_time_imbalance > 5.0:
             print(f"Average Load Imbalance: {avg_time_imbalance:.1f}%")
-        
+
         print(f"{'='*70}")
-        
+
         # Save results with topology information
         with open("training_results.txt", "w") as f:
             f.write(f"Training Results with Data Provider\n")
@@ -441,21 +471,23 @@ def main():
             f.write(f"Epochs: {NUM_EPOCHS}\n")
             f.write(f"Global Throughput: {avg_global_throughput:.2f} samples/sec\n")
             f.write(f"Scaling Efficiency: {scaling_efficiency:.1f}%\n")
-        
+
         # Output performance metric for madengine (REQUIRED FORMAT)
         # Use GLOBAL throughput (sum of all nodes - accurate measurement)
         print(f"\nperformance: {avg_global_throughput:.2f} samples_per_second")
-        
+
         # Output topology metadata for parsing
-        print(f"topology: {num_nodes} nodes {nproc_per_node} gpus_per_node {world_size} total_gpus")
+        print(
+            f"topology: {num_nodes} nodes {nproc_per_node} gpus_per_node {world_size} total_gpus"
+        )
         print(f"scaling_efficiency: {scaling_efficiency:.2f}")
-    
+
     # Cleanup
     if world_size > 1:
         dist.destroy_process_group()
         if rank == 0:
             print(f"✓ Process group destroyed")
-    
+
     return 0
 
 
@@ -465,6 +497,6 @@ def main():
     except Exception as e:
         print(f"[Rank {rank}] ✗ Error: {e}", file=sys.stderr)
         import traceback
+
         traceback.print_exc()
         sys.exit(1)
-
diff --git a/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_data_nas.py b/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_data_nas.py
index 5599981f..9bb44d14 100755
--- a/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_data_nas.py
+++ b/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_data_nas.py
@@ -22,14 +22,15 @@
 """
 
 import os
+import pathlib
+import socket
 import sys
 import time
-import socket
-import pathlib
+
 import torch
+import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
-import torch.distributed as dist
 from torch.nn.parallel import DistributedDataParallel as DDP
 
 # Configuration
@@ -76,37 +77,37 @@ def print_header():
 def validate_data_availability():
     """
     Validate that required data is available from NAS (K8s best practice).
-    
+
     Strategy:
     1. Rank 0 checks data first and reports status
     2. All ranks independently validate data (no barrier needed before init_process_group)
     3. Exit gracefully if data missing
-    
+
     Note: For K8s deployments, MAD_DATAHOME points to PVC mount point (/data).
     This ensures data is shared across all pods (single-node and multi-node).
     PVC must be configured with ReadWriteMany for multi-node deployments.
-    
+
     NAS can be either:
     - Mounted filesystem (traditional NAS)
     - Downloaded data to directory (K8s with data provider)
-    
+
     Similar to run_data_nas.sh: We just verify the data home directory exists and
     optionally has content. No specific file is required - we use synthetic data for
     training benchmarks.
-    
+
     Returns:
         bool: True if data is available, False otherwise
     """
     # K8s best practice: Data stored in PVC at /data (separate from compute pods)
     data_home = os.environ.get("MAD_DATAHOME", "/data")
     data_home_path = pathlib.Path(data_home)
-    
+
     if rank == 0:
         print(f"\n{'='*70}")
         print("NAS Data Provider Validation")
         print(f"{'='*70}")
         print(f"Data Home: {data_home}")
-        
+
         # Check if data directory exists
         if not data_home_path.exists():
             print(f"❌ Data home directory NOT found!")
@@ -114,7 +115,7 @@ def validate_data_availability():
             print(f"   MAD_DATAHOME must be set and directory must exist")
         else:
             print(f"✅ Data home directory exists: {data_home}")
-            
+
             # Check if directory has content (similar to run_data_nas.sh)
             try:
                 dir_contents = list(data_home_path.iterdir())
@@ -138,42 +139,43 @@ def validate_data_availability():
             except PermissionError:
                 print(f"⚠️  Cannot read directory contents (permission denied)")
                 print(f"   Directory exists but contents not accessible")
-        
+
         print(f"{'='*70}\n")
-    
+
     # Note: Cannot use dist.barrier() here - process group not initialized yet
     # Data validation happens before distributed initialization
     # All ranks will independently validate data availability without synchronization
-    
+
     # All ranks independently validate data home exists
     # We don't require a specific file - just that the directory exists
     data_available = data_home_path.exists()
-    
+
     if not data_available:
         print(f"[Rank {rank}] ❌ ERROR: Data home not found at {data_home}")
     else:
         print(f"[Rank {rank}] ✅ Data home validated: {data_home}")
-    
+
     return data_available
 
 
 class SimpleCNN(nn.Module):
     """Simple CNN model for benchmarking"""
+
     def __init__(self, num_classes=1000):
         super(SimpleCNN, self).__init__()
         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
         self.bn1 = nn.BatchNorm2d(64)
         self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        
+
         self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
         self.bn2 = nn.BatchNorm2d(128)
-        
+
         self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
         self.bn3 = nn.BatchNorm2d(256)
-        
+
         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
         self.fc = nn.Linear(256, num_classes)
-    
+
     def forward(self, x):
         x = self.pool(F.relu(self.bn1(self.conv1(x))))
         x = self.pool(F.relu(self.bn2(self.conv2(x))))
@@ -197,106 +199,114 @@ def train_epoch(model, optimizer, criterion, epoch, device):
     epoch_start = time.time()
     total_samples = 0
     total_loss = 0.0
-    
+
     for batch_idx in range(NUM_BATCHES):
         batch_start = time.time()
-        
+
         # Generate synthetic data
         images, labels = generate_synthetic_batch(BATCH_SIZE, device)
-        
+
         # Forward pass
         optimizer.zero_grad()
         outputs = model(images)
         loss = criterion(outputs, labels)
-        
+
         # Backward pass (gradients are automatically synchronized across GPUs)
         loss.backward()
-        
+
         # Update weights
         optimizer.step()
-        
+
         batch_time = time.time() - batch_start
         total_samples += BATCH_SIZE
         total_loss += loss.item()
-        
+
         # Print progress from rank 0
         if rank == 0 and (batch_idx + 1) % 20 == 0:
             avg_loss = total_loss / (batch_idx + 1)
             throughput = BATCH_SIZE * world_size / batch_time
-            print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
-                  f"Batch [{batch_idx+1}/{NUM_BATCHES}] "
-                  f"Loss: {loss.item():.4f} "
-                  f"Throughput: {throughput:.2f} samples/sec")
-    
+            print(
+                f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
+                f"Batch [{batch_idx+1}/{NUM_BATCHES}] "
+                f"Loss: {loss.item():.4f} "
+                f"Throughput: {throughput:.2f} samples/sec"
+            )
+
     epoch_time = time.time() - epoch_start
     avg_loss = total_loss / NUM_BATCHES
-    
+
     # ========================================================================
     # Accurate Distributed Throughput Measurement (Best Practice)
     # ========================================================================
     # Calculate local throughput for this rank
     local_samples = NUM_BATCHES * BATCH_SIZE
     local_throughput = local_samples / epoch_time
-    
+
     # Aggregate metrics across all ranks using all_reduce
     if world_size > 1:
         # Convert to tensors for all_reduce
         local_throughput_tensor = torch.tensor([local_throughput], device=device)
         epoch_time_tensor = torch.tensor([epoch_time], device=device)
-        
+
         # Sum all local throughputs to get true global throughput
         global_throughput_tensor = local_throughput_tensor.clone()
         dist.all_reduce(global_throughput_tensor, op=dist.ReduceOp.SUM)
-        
+
         # Get max epoch time (slowest node determines overall speed)
         max_epoch_time_tensor = epoch_time_tensor.clone()
         dist.all_reduce(max_epoch_time_tensor, op=dist.ReduceOp.MAX)
-        
+
         # Get min epoch time (fastest node)
         min_epoch_time_tensor = epoch_time_tensor.clone()
         dist.all_reduce(min_epoch_time_tensor, op=dist.ReduceOp.MIN)
-        
+
         global_throughput = global_throughput_tensor.item()
         max_epoch_time = max_epoch_time_tensor.item()
         min_epoch_time = min_epoch_time_tensor.item()
-        
+
         # Calculate load imbalance
-        time_imbalance = ((max_epoch_time - min_epoch_time) / max_epoch_time) * 100 if max_epoch_time > 0 else 0.0
-        
+        time_imbalance = (
+            ((max_epoch_time - min_epoch_time) / max_epoch_time) * 100
+            if max_epoch_time > 0
+            else 0.0
+        )
+
     else:
         # Single GPU
         global_throughput = local_throughput
         max_epoch_time = epoch_time
         min_epoch_time = epoch_time
         time_imbalance = 0.0
-    
+
     # Return metrics dictionary
     metrics = {
-        'avg_loss': avg_loss,
-        'local_throughput': local_throughput,
-        'global_throughput': global_throughput,
-        'epoch_time': epoch_time,
-        'max_epoch_time': max_epoch_time,
-        'min_epoch_time': min_epoch_time,
-        'time_imbalance': time_imbalance
+        "avg_loss": avg_loss,
+        "local_throughput": local_throughput,
+        "global_throughput": global_throughput,
+        "epoch_time": epoch_time,
+        "max_epoch_time": max_epoch_time,
+        "min_epoch_time": min_epoch_time,
+        "time_imbalance": time_imbalance,
     }
-    
+
     return metrics
 
 
 def main():
     """Main training function"""
     print_header()
-    
+
     # Create per-process MIOpen cache directory to avoid database conflicts
     # This must be done AFTER torchrun sets LOCAL_RANK environment variable
     if "MIOPEN_USER_DB_PATH" in os.environ:
         # Construct the per-process MIOpen path using actual local_rank value
         miopen_template = os.environ["MIOPEN_USER_DB_PATH"]
-        miopen_path = miopen_template.replace("${LOCAL_RANK:-0}", str(local_rank)).replace("$LOCAL_RANK", str(local_rank))
+        miopen_path = miopen_template.replace(
+            "${LOCAL_RANK:-0}", str(local_rank)
+        ).replace("$LOCAL_RANK", str(local_rank))
         os.makedirs(miopen_path, exist_ok=True)
         print(f"[Rank {rank}] ✓ Created MIOpen cache directory: {miopen_path}")
-    
+
     # ========================================================================
     # K8s Best Practice: Validate Data Before Initializing Training
     # ========================================================================
@@ -304,10 +314,10 @@ def main():
         print(f"\n{'='*70}")
         print("Step 1: NAS Data Provider Validation")
         print(f"{'='*70}")
-    
+
     # Validate data availability (all ranks)
     data_available = validate_data_availability()
-    
+
     if not data_available:
         # Exit gracefully if data is not available
         if rank == 0:
@@ -316,10 +326,10 @@ def main():
             print(f"{'='*70}")
             print("Exiting...")
         sys.exit(1)
-    
+
     if rank == 0:
         print(f"\n✅ Data validation complete - proceeding with training\n")
-    
+
     # ========================================================================
     # Initialize Distributed Training
     # ========================================================================
@@ -328,44 +338,48 @@ def main():
             print(f"{'='*70}")
             print("Step 2: Initialize Distributed Training")
             print(f"{'='*70}")
-        
+
         print(f"\n[Rank {rank}] Initializing distributed process group...")
         # Best practice: Specify device_ids to avoid PyTorch warnings
         dist.init_process_group(
             backend="nccl",
             init_method=f"env://",  # Use environment variables (set by torchrun)
             world_size=world_size,
-            rank=rank
+            rank=rank,
         )
         print(f"[Rank {rank}] ✓ Process group initialized")
         print(f"[Rank {rank}]   Backend: {dist.get_backend()}")
         print(f"[Rank {rank}]   World Size: {dist.get_world_size()}")
     else:
         print(f"\n=== Running in Standalone Mode (Single GPU) ===")
-    
+
     # Set device
     if torch.cuda.is_available():
         num_gpus = torch.cuda.device_count()
         print(f"[Rank {rank}] PyTorch sees {num_gpus} GPU(s)")
-        print(f"[Rank {rank}] LOCAL_RANK={local_rank}, attempting to use cuda:{local_rank}")
-        
+        print(
+            f"[Rank {rank}] LOCAL_RANK={local_rank}, attempting to use cuda:{local_rank}"
+        )
+
         if local_rank >= num_gpus:
-            print(f"[Rank {rank}] ERROR: LOCAL_RANK {local_rank} >= available GPUs {num_gpus}")
+            print(
+                f"[Rank {rank}] ERROR: LOCAL_RANK {local_rank} >= available GPUs {num_gpus}"
+            )
             print(f"[Rank {rank}] Using cuda:0 instead")
             device = torch.device("cuda:0")
         else:
             device = torch.device(f"cuda:{local_rank}")
-        
+
         torch.cuda.set_device(device)
         print(f"[Rank {rank}] Using GPU: {torch.cuda.get_device_name(device)}")
     else:
         device = torch.device("cpu")
         print(f"[Rank {rank}] Warning: CUDA not available, using CPU")
-    
+
     # Create model
     print(f"\n[Rank {rank}] Creating model...")
     model = SimpleCNN(num_classes=NUM_CLASSES).to(device)
-    
+
     # Wrap model with DDP for distributed training
     if world_size > 1:
         # Best practice: Explicitly specify device_ids for DDP
@@ -374,89 +388,105 @@ def main():
             device_ids=[local_rank],
             output_device=local_rank,
             broadcast_buffers=True,  # Ensure buffers (like BatchNorm stats) are synced
-            find_unused_parameters=False  # Set True only if needed (performance impact)
+            find_unused_parameters=False,  # Set True only if needed (performance impact)
         )
         print(f"[Rank {rank}] ✓ Model wrapped with DistributedDataParallel")
-    
+
     # Create optimizer and loss function
     optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
     criterion = nn.CrossEntropyLoss()
-    
+
     # Synchronize before training
     if world_size > 1:
         # Best practice: Specify device to avoid warnings
         dist.barrier(device_ids=[local_rank])
-    
+
     if rank == 0:
         print(f"\n{'='*70}")
         print("Starting Training")
         print(f"{'='*70}")
-    
+
     # Training loop
     all_metrics = []
     for epoch in range(NUM_EPOCHS):
-        metrics = train_epoch(
-            model, optimizer, criterion, epoch, device
-        )
+        metrics = train_epoch(model, optimizer, criterion, epoch, device)
         all_metrics.append(metrics)
-        
+
         if rank == 0:
             print(f"\nEpoch [{epoch+1}/{NUM_EPOCHS}] Complete:")
             print(f"  Average Loss: {metrics['avg_loss']:.4f}")
-            print(f"  Global Throughput: {metrics['global_throughput']:.2f} samples/sec")
+            print(
+                f"  Global Throughput: {metrics['global_throughput']:.2f} samples/sec"
+            )
             print(f"  Images/sec: {metrics['global_throughput']:.2f}")
-            
+
             # Show load imbalance warning if significant
-            if metrics['time_imbalance'] > 5.0:
+            if metrics["time_imbalance"] > 5.0:
                 print(f"  ⚠️  Load Imbalance: {metrics['time_imbalance']:.1f}%")
-    
+
     # Calculate average metrics across all epochs
-    avg_global_throughput = sum(m['global_throughput'] for m in all_metrics) / len(all_metrics)
-    avg_local_throughput = sum(m['local_throughput'] for m in all_metrics) / len(all_metrics)
-    avg_time_imbalance = sum(m['time_imbalance'] for m in all_metrics) / len(all_metrics)
-    
+    avg_global_throughput = sum(m["global_throughput"] for m in all_metrics) / len(
+        all_metrics
+    )
+    avg_local_throughput = sum(m["local_throughput"] for m in all_metrics) / len(
+        all_metrics
+    )
+    avg_time_imbalance = sum(m["time_imbalance"] for m in all_metrics) / len(
+        all_metrics
+    )
+
     # Get topology information
     nproc_per_node = int(os.environ.get("LOCAL_WORLD_SIZE", world_size))
-    num_nodes = (world_size + nproc_per_node - 1) // nproc_per_node if nproc_per_node > 0 else 1
+    num_nodes = (
+        (world_size + nproc_per_node - 1) // nproc_per_node if nproc_per_node > 0 else 1
+    )
     node_rank = rank // nproc_per_node if nproc_per_node > 0 else 0
-    
+
     # Synchronize before final output
     if world_size > 1:
         dist.barrier(device_ids=[local_rank])
-    
+
     # Each node's rank 0 reports local performance
     if local_rank == 0:
         print(f"\n[Node {node_rank}] Local Performance Summary:")
-        print(f"  Node Throughput: {avg_local_throughput * nproc_per_node:.2f} samples/sec")
+        print(
+            f"  Node Throughput: {avg_local_throughput * nproc_per_node:.2f} samples/sec"
+        )
         print(f"  GPUs on Node: {nproc_per_node}")
         print(f"  Avg Time per Epoch: {all_metrics[-1]['epoch_time']:.2f}s")
-    
+
     # Synchronize again before global rank 0 output
     if world_size > 1:
         dist.barrier(device_ids=[local_rank])
-    
+
     # Global rank 0 reports aggregated performance
     if rank == 0:
         print(f"\n{'='*70}")
         print("Training Complete - GLOBAL METRICS")
         print(f"{'='*70}")
-        print(f"Topology: {num_nodes} nodes × {nproc_per_node} GPUs/node = {world_size} total GPUs")
+        print(
+            f"Topology: {num_nodes} nodes × {nproc_per_node} GPUs/node = {world_size} total GPUs"
+        )
         print(f"Global Throughput: {avg_global_throughput:.2f} samples/sec")
         print(f"Per-GPU Throughput: {avg_global_throughput/world_size:.2f} samples/sec")
         print(f"Global Batch Size: {BATCH_SIZE * world_size}")
-        
+
         # Calculate scaling efficiency
         # Ideal throughput = single GPU throughput * number of GPUs
         ideal_single_gpu_throughput = avg_global_throughput / world_size
         ideal_throughput = ideal_single_gpu_throughput * world_size
-        scaling_efficiency = (avg_global_throughput / ideal_throughput) * 100 if ideal_throughput > 0 else 100.0
+        scaling_efficiency = (
+            (avg_global_throughput / ideal_throughput) * 100
+            if ideal_throughput > 0
+            else 100.0
+        )
         print(f"Scaling Efficiency: {scaling_efficiency:.1f}%")
-        
+
         if avg_time_imbalance > 5.0:
             print(f"Average Load Imbalance: {avg_time_imbalance:.1f}%")
-        
+
         print(f"{'='*70}")
-        
+
         # Save results with topology information
         data_home = os.environ.get("MAD_DATAHOME", "/data")
         with open("training_results.txt", "w") as f:
@@ -470,21 +500,23 @@ def main():
             f.write(f"Epochs: {NUM_EPOCHS}\n")
             f.write(f"Global Throughput: {avg_global_throughput:.2f} samples/sec\n")
             f.write(f"Scaling Efficiency: {scaling_efficiency:.1f}%\n")
-        
+
         # Output performance metric for madengine (REQUIRED FORMAT)
         # Use GLOBAL throughput (sum of all nodes - accurate measurement)
         print(f"\nperformance: {avg_global_throughput:.2f} samples_per_second")
-        
+
         # Output topology metadata for parsing
-        print(f"topology: {num_nodes} nodes {nproc_per_node} gpus_per_node {world_size} total_gpus")
+        print(
+            f"topology: {num_nodes} nodes {nproc_per_node} gpus_per_node {world_size} total_gpus"
+        )
         print(f"scaling_efficiency: {scaling_efficiency:.2f}")
-    
+
     # Cleanup
     if world_size > 1:
         dist.destroy_process_group()
         if rank == 0:
             print(f"✓ Process group destroyed")
-    
+
     return 0
 
 
@@ -494,6 +526,6 @@ def main():
     except Exception as e:
         print(f"[Rank {rank}] ✗ Error: {e}", file=sys.stderr)
         import traceback
+
         traceback.print_exc()
         sys.exit(1)
-
diff --git a/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_multi.py b/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_multi.py
index dc09b9cf..2ad1e0c9 100644
--- a/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_multi.py
+++ b/tests/fixtures/dummy/scripts/dummy_torchrun/run_torchrun_multi.py
@@ -11,15 +11,16 @@
   torchrun --standalone --nproc_per_node=8 run_torchrun_multi.py
 """
 
+import csv
 import os
+import socket
 import sys
 import time
-import socket
-import csv
+
 import torch
+import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
-import torch.distributed as dist
 from torch.nn.parallel import DistributedDataParallel as DDP
 
 # Configuration
@@ -60,21 +61,22 @@ def print_header():
 
 class SimpleCNN(nn.Module):
     """Simple CNN model for benchmarking"""
+
     def __init__(self, num_classes=1000):
         super(SimpleCNN, self).__init__()
         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
         self.bn1 = nn.BatchNorm2d(64)
         self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        
+
         self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
         self.bn2 = nn.BatchNorm2d(128)
-        
+
         self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
         self.bn3 = nn.BatchNorm2d(256)
-        
+
         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
         self.fc = nn.Linear(256, num_classes)
-    
+
     def forward(self, x):
         x = self.pool(F.relu(self.bn1(self.conv1(x))))
         x = self.pool(F.relu(self.bn2(self.conv2(x))))
@@ -98,40 +100,42 @@ def train_epoch(model, optimizer, criterion, epoch, device):
     epoch_start = time.time()
     total_samples = 0
     total_loss = 0.0
-    
+
     for batch_idx in range(NUM_BATCHES):
         batch_start = time.time()
-        
+
         # Generate synthetic data
         images, labels = generate_synthetic_batch(BATCH_SIZE, device)
-        
+
         # Forward pass
         optimizer.zero_grad()
         outputs = model(images)
         loss = criterion(outputs, labels)
-        
+
         # Backward pass (gradients are automatically synchronized across GPUs)
         loss.backward()
-        
+
         # Update weights
         optimizer.step()
-        
+
         batch_time = time.time() - batch_start
         total_samples += BATCH_SIZE
         total_loss += loss.item()
-        
+
         # Print progress from local rank 0 on each node
         if local_rank == 0 and (batch_idx + 1) % 20 == 0:
             avg_loss = total_loss / (batch_idx + 1)
             throughput = BATCH_SIZE / batch_time  # Local throughput
-            print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
-                  f"Batch [{batch_idx+1}/{NUM_BATCHES}] "
-                  f"Loss: {loss.item():.4f} "
-                  f"Throughput: {throughput:.2f} samples/sec (local)")
-    
+            print(
+                f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
+                f"Batch [{batch_idx+1}/{NUM_BATCHES}] "
+                f"Loss: {loss.item():.4f} "
+                f"Throughput: {throughput:.2f} samples/sec (local)"
+            )
+
     epoch_time = time.time() - epoch_start
     avg_loss = total_loss / NUM_BATCHES
-    
+
     # ========================================================================
     # Node-Local Throughput Measurement
     # ========================================================================
@@ -139,112 +143,115 @@ def train_epoch(model, optimizer, criterion, epoch, device):
     local_gpu_throughput = local_samples / epoch_time
     local_world_size = int(os.environ.get("LOCAL_WORLD_SIZE", 1))
     node_throughput = local_gpu_throughput * local_world_size
-    
+
     metrics = {
-        'avg_loss': avg_loss,
-        'node_throughput': node_throughput,
-        'epoch_time': epoch_time,
-        'local_world_size': local_world_size
+        "avg_loss": avg_loss,
+        "node_throughput": node_throughput,
+        "epoch_time": epoch_time,
+        "local_world_size": local_world_size,
     }
-    
+
     return metrics
 
 
 def main():
     """Main training function"""
     test_start_time = time.time()
-    
+
     print_header()
-    
+
     # Create per-process MIOpen cache directory to avoid database conflicts
     if "MIOPEN_USER_DB_PATH" in os.environ:
         miopen_template = os.environ["MIOPEN_USER_DB_PATH"]
-        miopen_path = miopen_template.replace("${LOCAL_RANK:-0}", str(local_rank)).replace("$LOCAL_RANK", str(local_rank))
+        miopen_path = miopen_template.replace(
+            "${LOCAL_RANK:-0}", str(local_rank)
+        ).replace("$LOCAL_RANK", str(local_rank))
         os.makedirs(miopen_path, exist_ok=True)
         print(f"[Rank {rank}] ✓ Created MIOpen cache directory: {miopen_path}")
-    
+
     # Initialize distributed training
     if world_size > 1:
         print(f"\n[Rank {rank}] Initializing distributed process group...")
         dist.init_process_group(
-            backend="nccl",
-            init_method=f"env://",
-            world_size=world_size,
-            rank=rank
+            backend="nccl", init_method=f"env://", world_size=world_size, rank=rank
         )
         print(f"[Rank {rank}] ✓ Process group initialized")
         print(f"[Rank {rank}]   Backend: {dist.get_backend()}")
         print(f"[Rank {rank}]   World Size: {dist.get_world_size()}")
     else:
         print(f"\n=== Running in Standalone Mode (Single GPU) ===")
-    
+
     # Set device
     if torch.cuda.is_available():
         num_gpus = torch.cuda.device_count()
         print(f"[Rank {rank}] PyTorch sees {num_gpus} GPU(s)")
-        print(f"[Rank {rank}] LOCAL_RANK={local_rank}, attempting to use cuda:{local_rank}")
-        
+        print(
+            f"[Rank {rank}] LOCAL_RANK={local_rank}, attempting to use cuda:{local_rank}"
+        )
+
         if local_rank >= num_gpus:
-            print(f"[Rank {rank}] ERROR: LOCAL_RANK {local_rank} >= available GPUs {num_gpus}")
+            print(
+                f"[Rank {rank}] ERROR: LOCAL_RANK {local_rank} >= available GPUs {num_gpus}"
+            )
             print(f"[Rank {rank}] Using cuda:0 instead")
             device = torch.device("cuda:0")
         else:
             device = torch.device(f"cuda:{local_rank}")
-        
+
         torch.cuda.set_device(device)
         print(f"[Rank {rank}] Using GPU: {torch.cuda.get_device_name(device)}")
     else:
         device = torch.device("cpu")
         print(f"[Rank {rank}] Warning: CUDA not available, using CPU")
-    
+
     # Create model
     print(f"\n[Rank {rank}] Creating model...")
     model = SimpleCNN(num_classes=NUM_CLASSES).to(device)
-    
+
     if world_size > 1:
         model = DDP(
             model,
             device_ids=[local_rank],
             output_device=local_rank,
             broadcast_buffers=True,
-            find_unused_parameters=False
+            find_unused_parameters=False,
         )
         print(f"[Rank {rank}] ✓ Model wrapped with DistributedDataParallel")
-    
+
     optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
     criterion = nn.CrossEntropyLoss()
-    
+
     if world_size > 1:
         dist.barrier(device_ids=[local_rank])
-    
+
     local_world_size = int(os.environ.get("LOCAL_WORLD_SIZE", 1))
     node_rank = rank // local_world_size if local_world_size > 0 else 0
-    
+
     if local_rank == 0:
         print(f"\n{'='*70}")
         print(f"[Node {node_rank}] Starting Training")
         print(f"{'='*70}")
-    
+
     # Training loop
     all_metrics = []
     for epoch in range(NUM_EPOCHS):
-        metrics = train_epoch(
-            model, optimizer, criterion, epoch, device
-        )
+        metrics = train_epoch(model, optimizer, criterion, epoch, device)
         all_metrics.append(metrics)
-        
+
         if local_rank == 0:
             print(f"\n[Node {node_rank}] Epoch [{epoch+1}/{NUM_EPOCHS}] Complete:")
             print(f"  Average Loss: {metrics['avg_loss']:.4f}")
             print(f"  Node Throughput: {metrics['node_throughput']:.2f} samples/sec")
             print(f"  Local GPUs: {metrics['local_world_size']}")
-    
-    avg_node_throughput = sum(m['node_throughput'] for m in all_metrics) / len(all_metrics)
-    avg_epoch_time = sum(m['epoch_time'] for m in all_metrics) / len(all_metrics)
-    
+
+    avg_node_throughput = sum(m["node_throughput"] for m in all_metrics) / len(
+        all_metrics
+    )
+    avg_epoch_time = sum(m["epoch_time"] for m in all_metrics) / len(all_metrics)
+
     if world_size > 1:
         dist.barrier(device_ids=[local_rank])
-    
+
     # Node-Local Performance Reporting + multiple_results CSV
     if local_rank == 0:
         print(f"\n{'='*70}")
@@ -256,11 +263,11 @@ def main():
         print(f"Node Throughput: {avg_node_throughput:.2f} samples_per_second")
         print(f"Avg Time per Epoch: {avg_epoch_time:.2f}s")
         print(f"{'='*70}")
-        
+
         print(f"performance: {avg_node_throughput:.2f} samples_per_second", flush=True)
         print(f"node_id: {node_rank}", flush=True)
         print(f"local_gpus: {local_world_size}", flush=True)
-        
+
         test_duration = time.time() - test_start_time
         print(f"test_duration: {test_duration:.2f}s", flush=True)
         sys.stdout.flush()
@@ -268,25 +275,29 @@ def main():
         # Write multiple_results CSV (model,temperature,performance,metric,test_duration for duration in reports)
         with open(MULTI_RESULTS_CSV, "w", newline="") as f:
             writer = csv.writer(f)
-            writer.writerow(["model", "temperature", "performance", "metric", "test_duration"])
+            writer.writerow(
+                ["model", "temperature", "performance", "metric", "test_duration"]
+            )
             test_dur_str = f"{test_duration:.2f}s"
             for i in range(4):
                 # Vary temperature for multiple rows; use throughput for performance
-                writer.writerow([
-                    i + 1,
-                    20 + i * 5,
-                    f"{avg_node_throughput:.2f}",
-                    "samples_per_sec",
-                    test_dur_str,
-                ])
+                writer.writerow(
+                    [
+                        i + 1,
+                        20 + i * 5,
+                        f"{avg_node_throughput:.2f}",
+                        "samples_per_sec",
+                        test_dur_str,
+                    ]
+                )
         print(f"Wrote {MULTI_RESULTS_CSV} for multiple_results collection", flush=True)
-    
+
     # Cleanup
     if world_size > 1:
         dist.destroy_process_group()
         if rank == 0:
             print(f"✓ Process group destroyed")
-    
+
     return 0
 
 
@@ -296,5 +307,6 @@ def main():
     except Exception as e:
         print(f"[Rank {rank}] ✗ Error: {e}", file=sys.stderr)
         import traceback
+
         traceback.print_exc()
         sys.exit(1)
diff --git a/tests/fixtures/dummy/scripts/dummy_torchrun/run_with_helper.py b/tests/fixtures/dummy/scripts/dummy_torchrun/run_with_helper.py
index 68329eb5..c86142d2 100644
--- a/tests/fixtures/dummy/scripts/dummy_torchrun/run_with_helper.py
+++ b/tests/fixtures/dummy/scripts/dummy_torchrun/run_with_helper.py
@@ -25,7 +25,7 @@
     BenchmarkConfig,
     print_distributed_info,
     print_gpu_info,
-    calculate_model_size
+    calculate_model_size,
 )
 
 # Get distributed environment variables (set by torchrun)
@@ -51,40 +51,44 @@ def train_epoch(model, dataset, optimizer, criterion, epoch, device, config):
     model.train()
     epoch_start = time.time()
     total_loss = 0.0
-    
+
     for batch_idx in range(dataset.num_batches):
         batch_start = time.time()
-        
+
         # Generate synthetic data
         images, labels = dataset.generate_batch(device)
-        
+
         # Forward pass
         optimizer.zero_grad()
         outputs = model(images)
         loss = criterion(outputs, labels)
-        
+
         # Backward pass (gradients automatically synchronized)
         loss.backward()
-        
+
         # Update weights
         optimizer.step()
-        
+
         batch_time = time.time() - batch_start
         total_loss += loss.item()
-        
+
         # Print progress from rank 0
         if rank == 0 and (batch_idx + 1) % 20 == 0:
             avg_loss = total_loss / (batch_idx + 1)
             throughput = config.batch_size * world_size / batch_time
-            print(f"Epoch [{epoch+1}/{config.num_epochs}] "
-                  f"Batch [{batch_idx+1}/{dataset.num_batches}] "
-                  f"Loss: {loss.item():.4f} "
-                  f"Throughput: {throughput:.2f} samples/sec")
-    
+            print(
+                f"Epoch [{epoch+1}/{config.num_epochs}] "
+                f"Batch [{batch_idx+1}/{dataset.num_batches}] "
+                f"Loss: {loss.item():.4f} "
+                f"Throughput: {throughput:.2f} samples/sec"
+            )
+
     epoch_time = time.time() - epoch_start
     avg_loss = total_loss / dataset.num_batches
-    epoch_throughput = (dataset.num_batches * config.batch_size * world_size) / epoch_time
-    
+    epoch_throughput = (
+        dataset.num_batches * config.batch_size * world_size
+    ) / epoch_time
+
     return avg_loss, epoch_throughput
 
 
@@ -92,12 +96,12 @@ def main():
     """Main training function"""
     # Load configuration
     config = BenchmarkConfig()
-    
+
     print_header(config)
-    
+
     # Print distributed info
     print_distributed_info(rank, local_rank, world_size)
-    
+
     # Initialize distributed training
     if world_size > 1:
         print(f"\n[Rank {rank}] Initializing distributed process group...")
@@ -106,40 +110,43 @@ def main():
             backend="nccl",
             init_method=f"env://",  # Use environment variables (set by torchrun)
             world_size=world_size,
-            rank=rank
+            rank=rank,
         )
         print(f"[Rank {rank}] ✓ Process group initialized")
         print(f"[Rank {rank}]   Backend: {dist.get_backend()}")
         print(f"[Rank {rank}]   World Size: {dist.get_world_size()}")
     else:
         print(f"\n=== Running in Standalone Mode (Single GPU) ===")
-    
+
     # Set device
     if torch.cuda.is_available():
         num_gpus = torch.cuda.device_count()
         print(f"[Rank {rank}] PyTorch sees {num_gpus} GPU(s)")
-        print(f"[Rank {rank}] LOCAL_RANK={local_rank}, attempting to use cuda:{local_rank}")
-        
+        print(
+            f"[Rank {rank}] LOCAL_RANK={local_rank}, attempting to use cuda:{local_rank}"
+        )
+
         if local_rank >= num_gpus:
-            print(f"[Rank {rank}] ERROR: LOCAL_RANK {local_rank} >= available GPUs {num_gpus}")
+            print(
+                f"[Rank {rank}] ERROR: LOCAL_RANK {local_rank} >= available GPUs {num_gpus}"
+            )
             print(f"[Rank {rank}] Using cuda:0 instead")
             device = torch.device("cuda:0")
         else:
             device = torch.device(f"cuda:{local_rank}")
-        
+
         torch.cuda.set_device(device)
         print_gpu_info(rank, device)
     else:
         device = torch.device("cpu")
         print(f"[Rank {rank}] Warning: CUDA not available, using CPU")
-    
+
     # Create model from helper module
     print(f"\n[Rank {rank}] Creating ResNet model from helper module...")
     model = ResNetModel(
-        num_classes=config.num_classes,
-        num_blocks=config.resnet_blocks
+        num_classes=config.num_classes, num_blocks=config.resnet_blocks
     ).to(device)
-    
+
     # Print model info
     if rank == 0:
         total_params, trainable_params = calculate_model_size(model)
@@ -147,7 +154,7 @@ def main():
         print(f"  Total Parameters: {total_params:,}")
         print(f"  Trainable Parameters: {trainable_params:,}")
         print(f"  Model Size: {total_params * 4 / 1e6:.2f} MB (FP32)")
-    
+
     # Wrap model with DDP for distributed training
     if world_size > 1:
         # Best practice: Explicitly specify device_ids for DDP
@@ -156,37 +163,37 @@ def main():
             device_ids=[local_rank],
             output_device=local_rank,
             broadcast_buffers=True,  # Ensure buffers (like BatchNorm stats) are synced
-            find_unused_parameters=False  # Set True only if needed (performance impact)
+            find_unused_parameters=False,  # Set True only if needed (performance impact)
         )
         print(f"[Rank {rank}] ✓ Model wrapped with DistributedDataParallel")
-    
+
     # Create dataset
     dataset = SyntheticDataset(
         num_samples=config.num_batches * config.batch_size,
         batch_size=config.batch_size,
         image_size=config.image_size,
-        num_classes=config.num_classes
+        num_classes=config.num_classes,
     )
-    
+
     # Create optimizer and loss function
     optimizer = torch.optim.SGD(
         model.parameters(),
         lr=config.learning_rate,
         momentum=config.momentum,
-        weight_decay=config.weight_decay
+        weight_decay=config.weight_decay,
     )
     criterion = nn.CrossEntropyLoss()
-    
+
     # Synchronize before training
     if world_size > 1:
         # Best practice: Specify device to avoid warnings
         dist.barrier(device_ids=[local_rank])
-    
+
     if rank == 0:
         print(f"\n{'='*70}")
         print("Starting Training")
         print(f"{'='*70}")
-    
+
     # Training loop
     all_throughputs = []
     for epoch in range(config.num_epochs):
@@ -194,19 +201,19 @@ def main():
             model, dataset, optimizer, criterion, epoch, device, config
         )
         all_throughputs.append(epoch_throughput)
-        
+
         if rank == 0:
             print(f"\nEpoch [{epoch+1}/{config.num_epochs}] Complete:")
             print(f"  Average Loss: {avg_loss:.4f}")
             print(f"  Throughput: {epoch_throughput:.2f} samples/sec")
-    
+
     # Calculate final metrics
     avg_throughput = sum(all_throughputs) / len(all_throughputs)
-    
+
     # Synchronize before final output
     if world_size > 1:
         dist.barrier(device_ids=[local_rank])
-    
+
     if rank == 0:
         print(f"\n{'='*70}")
         print("Training Complete")
@@ -216,7 +223,7 @@ def main():
         print(f"Number of GPUs: {world_size}")
         print(f"Model: ResNet with {sum(config.resnet_blocks)} blocks")
         print(f"{'='*70}")
-        
+
         # Save results
         with open("training_results_helper.txt", "w") as f:
             f.write(f"Training Results (with Helper Modules)\n")
@@ -227,16 +234,16 @@ def main():
             f.write(f"Epochs: {config.num_epochs}\n")
             f.write(f"Model: ResNet-{sum(config.resnet_blocks)*2+2}\n")
             f.write(f"Average Throughput: {avg_throughput:.2f} samples/sec\n")
-        
+
         # Output performance metric for madengine (REQUIRED FORMAT)
         print(f"\nperformance: {avg_throughput:.2f} samples_per_second")
-    
+
     # Cleanup
     if world_size > 1:
         dist.destroy_process_group()
         if rank == 0:
             print(f"✓ Process group destroyed")
-    
+
     return 0
 
 
@@ -246,5 +253,6 @@ def main():
     except Exception as e:
         print(f"[Rank {rank}] ✗ Error: {e}", file=sys.stderr)
         import traceback
+
         traceback.print_exc()
         sys.exit(1)
diff --git a/tests/fixtures/dummy/scripts/dummy_vllm/run_vllm_inference.py b/tests/fixtures/dummy/scripts/dummy_vllm/run_vllm_inference.py
index f4c6a5f3..983709fc 100755
--- a/tests/fixtures/dummy/scripts/dummy_vllm/run_vllm_inference.py
+++ b/tests/fixtures/dummy/scripts/dummy_vllm/run_vllm_inference.py
@@ -14,11 +14,11 @@
   Multi-node: Use Ray backend with proper configuration
 """
 
+import argparse
 import os
+import socket
 import sys
 import time
-import argparse
-import socket
 from typing import List, Optional
 
 # Configure environment before importing vLLM
@@ -30,8 +30,8 @@
 os.environ.setdefault("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
 
 try:
-    from vllm import LLM, SamplingParams
     import torch
+    from vllm import LLM, SamplingParams
 except ImportError as e:
     print(f"Error importing required libraries: {e}")
     print("Please ensure vLLM and PyTorch are installed")
@@ -60,10 +60,10 @@ def print_header(args):
     print("vLLM V1 Engine Distributed Inference Benchmark")
     print("=" * 70)
     print(f"Hostname: {socket.gethostname()}")
-    
+
     nnodes = int(os.environ.get("NNODES", "1"))
     node_rank = int(os.environ.get("NODE_RANK", "0"))
-    
+
     print(f"Model: {args.model}")
     print(f"Tensor Parallel Size: {args.tensor_parallel_size}")
     print(f"Pipeline Parallel Size: {args.pipeline_parallel_size}")
@@ -96,19 +96,21 @@ def run_inference(args):
     print("\n" + "=" * 70)
     print("Initializing vLLM V1 Engine")
     print("=" * 70)
-    
+
     nnodes = int(os.environ.get("NNODES", "1"))
     node_rank = int(os.environ.get("NODE_RANK", "0"))
-    
+
     if args.distributed_backend == "auto":
         distributed_backend = "ray" if nnodes > 1 else None
     else:
-        distributed_backend = args.distributed_backend if args.distributed_backend != "none" else None
-    
+        distributed_backend = (
+            args.distributed_backend if args.distributed_backend != "none" else None
+        )
+
     # Use requested TP and PP (multi-node uses TP+PP from madengine env; no forced PP=1)
     effective_pipeline_size = args.pipeline_parallel_size
     effective_gpu_memory = 0.60 if args.pipeline_parallel_size > 1 else 0.85
-    
+
     if nnodes > 1 and distributed_backend == "ray":
         print("=" * 70)
         print("MULTI-NODE TP + PP (single Ray cluster)")
@@ -118,9 +120,9 @@ def run_inference(args):
         print(f"Pipeline Parallel Size: {effective_pipeline_size}")
         print(f"Total GPUs: {args.tensor_parallel_size * effective_pipeline_size}")
         print("=" * 70)
-    
+
     print(f"Using distributed backend: {distributed_backend or 'default'}")
-    
+
     # Initialize vLLM LLM engine with V1-specific settings
     try:
         llm_kwargs = {
@@ -133,59 +135,62 @@ def run_inference(args):
             "max_model_len": 2048,
             "disable_log_stats": True,  # Reduce logging noise
         }
-        
+
         # Add distributed backend if specified
         if distributed_backend:
             llm_kwargs["distributed_executor_backend"] = distributed_backend
-        
+
         # V1 engine specific: enforce_eager mode for compatibility
         if args.enforce_eager:
             llm_kwargs["enforce_eager"] = True
-        
+
         llm = LLM(**llm_kwargs)
         print("✓ vLLM V1 engine initialized successfully")
         if nnodes > 1:
-            print(f"✓ Multi-node TP={args.tensor_parallel_size} PP={effective_pipeline_size} (Ray)")
+            print(
+                f"✓ Multi-node TP={args.tensor_parallel_size} PP={effective_pipeline_size} (Ray)"
+            )
     except Exception as e:
         print(f"✗ Failed to initialize vLLM engine: {e}")
         import traceback
+
         traceback.print_exc()
         return 1
-    
+
     # Configure sampling parameters
     sampling_params = SamplingParams(
         temperature=TEMPERATURE,
         top_p=TOP_P,
         max_tokens=MAX_TOKENS,
     )
-    
+
     print(f"\n{'=' * 70}")
     print("Running Inference")
     print("=" * 70)
-    
+
     # Generate prompts
     prompts = generate_prompts(NUM_PROMPTS)
-    
+
     # Warmup run (not timed)
     print("\nWarmup: Running 10 prompts...")
     warmup_prompts = prompts[:10]
     _ = llm.generate(warmup_prompts, sampling_params)
     print("✓ Warmup complete")
-    
+
     # Benchmark run (timed)
     print(f"\nBenchmark: Running {NUM_PROMPTS} prompts...")
     start_time = time.time()
-    
+
     outputs = llm.generate(prompts, sampling_params)
-    
+
     end_time = time.time()
     elapsed_time = end_time - start_time
-    
+
     # Calculate metrics
     total_tokens = sum(len(output.outputs[0].token_ids) for output in outputs)
     throughput = NUM_PROMPTS / elapsed_time
     tokens_per_second = total_tokens / elapsed_time
-    
+
     print(f"\n{'=' * 70}")
     print("Benchmark Results")
     print("=" * 70)
@@ -195,9 +200,11 @@ def run_inference(args):
     print(f"Token generation: {tokens_per_second:.2f} tokens/second")
     print(f"Average latency: {(elapsed_time / NUM_PROMPTS) * 1000:.2f} ms/request")
     if nnodes > 1:
-        print(f"(Multi-node TP+PP: single replica across {args.tensor_parallel_size * effective_pipeline_size} GPUs)")
+        print(
+            f"(Multi-node TP+PP: single replica across {args.tensor_parallel_size * effective_pipeline_size} GPUs)"
+        )
     print("=" * 70)
-    
+
     # Print sample outputs
     print("\n" + "=" * 70)
     print("Sample Outputs (first 3)")
@@ -207,7 +214,7 @@ def run_inference(args):
         generated_text = output.outputs[0].text
         print(f"\n[Prompt {i+1}]: {prompt}")
         print(f"[Output {i+1}]: {generated_text[:200]}...")  # First 200 chars
-    
+
     # madengine output format
     print(f"\nperformance: {throughput:.2f} requests_per_second")
     print(f"tokens_per_second: {tokens_per_second:.2f}")
@@ -217,7 +224,7 @@ def run_inference(args):
     if nnodes > 1:
         print(f"nnodes: {nnodes}")
     print(f"distributed_backend: {distributed_backend or 'default'}")
-    
+
     return 0
 
 
@@ -230,47 +237,47 @@ def main():
         "--model",
         type=str,
         default=DEFAULT_MODEL,
-        help=f"Model name or path (default: {DEFAULT_MODEL})"
+        help=f"Model name or path (default: {DEFAULT_MODEL})",
     )
     parser.add_argument(
         "--tensor-parallel-size",
         type=int,
         default=1,
-        help="Number of GPUs for tensor parallelism (default: 1)"
+        help="Number of GPUs for tensor parallelism (default: 1)",
     )
     parser.add_argument(
         "--pipeline-parallel-size",
         type=int,
         default=1,
-        help="Number of nodes for pipeline parallelism (default: 1)"
+        help="Number of nodes for pipeline parallelism (default: 1)",
     )
     parser.add_argument(
         "--distributed-backend",
         type=str,
         choices=["auto", "ray", "mp", "none"],
         default="auto",
-        help="Distributed backend: auto (default), ray (multi-node), mp (multiprocessing), none"
+        help="Distributed backend: auto (default), ray (multi-node), mp (multiprocessing), none",
     )
     parser.add_argument(
         "--enforce-eager",
         action="store_true",
-        help="Disable CUDA graph for compatibility"
+        help="Disable CUDA graph for compatibility",
     )
-    
+
     args = parser.parse_args()
-    
+
     # Validate arguments
     if args.tensor_parallel_size < 1:
         print("Error: tensor-parallel-size must be >= 1")
         return 1
-    
+
     if args.pipeline_parallel_size < 1:
         print("Error: pipeline-parallel-size must be >= 1")
         return 1
-    
+
     # Print configuration
     print_header(args)
-    
+
     # Run inference benchmark
     return run_inference(args)
 
@@ -284,6 +291,6 @@ def main():
     except Exception as e:
         print(f"\nError: {e}", file=sys.stderr)
         import traceback
+
         traceback.print_exc()
         sys.exit(1)
-
diff --git a/tests/fixtures/dummy/scripts/pyt_huggingface_bert/run.sh b/tests/fixtures/dummy/scripts/pyt_huggingface_bert/run.sh
index 8693dc66..5e06316d 100644
--- a/tests/fixtures/dummy/scripts/pyt_huggingface_bert/run.sh
+++ b/tests/fixtures/dummy/scripts/pyt_huggingface_bert/run.sh
@@ -62,6 +62,6 @@ torchrun $HF_PATH/examples/pytorch/language-modeling/run_mlm.py \
 # output performance metric
 performance=$(cat log.txt | grep -Eo "train_samples_per_second':[^,]+" | sed "s/train_samples_per_second': //g" | head -n 1)
 
-# unset printing trace to not confuse Jenkinsfile 
+# unset printing trace to not confuse Jenkinsfile
 set +x
 echo "performance: $performance samples_per_second"
diff --git a/tests/fixtures/utils.py b/tests/fixtures/utils.py
index 53cd6938..3d8c399f 100644
--- a/tests/fixtures/utils.py
+++ b/tests/fixtures/utils.py
@@ -5,15 +5,16 @@
 
 # built-in modules
 import csv
+import json
 import os
 import re
 import shutil
 import subprocess
 import sys
-import json
-import pytest
 from unittest.mock import MagicMock
 
+import pytest
+
 MODEL_DIR = "tests/fixtures/dummy"
 BASE_DIR = os.path.join(os.path.dirname(__file__), "..", "..")
 sys.path.insert(1, BASE_DIR)
@@ -48,11 +49,11 @@ def has_gpu() -> bool:
         # This is safe for pytest collection and avoids hanging
         nvidia_exists = os.path.exists("/usr/bin/nvidia-smi")
         from madengine.core.constants import get_rocm_path
+
         rocm_path = get_rocm_path()
-        amd_rocm_exists = (
-            os.path.exists(os.path.join(rocm_path, "bin", "rocm-smi"))
-            or os.path.exists("/usr/local/bin/rocm-smi")
-        )
+        amd_rocm_exists = os.path.exists(
+            os.path.join(rocm_path, "bin", "rocm-smi")
+        ) or os.path.exists("/usr/local/bin/rocm-smi")
 
         _has_gpu_cache = nvidia_exists or amd_rocm_exists
 
@@ -89,18 +90,18 @@ def global_data():
 def clean_test_temp_files(request):
     """
     Fixture to clean up test temporary files and Docker containers.
-    
+
     Cleans up both before (to ensure clean state) and after (to avoid conflicts).
     """
     import subprocess
-    
+
     # Clean up Docker containers BEFORE test (ensure clean state)
     try:
         subprocess.run(
             "docker ps -a | grep 'container_ci-dummy' | awk '{print $1}' | xargs -r docker rm -f",
             shell=True,
             capture_output=True,
-            timeout=30
+            timeout=30,
         )
     except Exception:
         pass  # Ignore cleanup errors before test
@@ -115,14 +116,14 @@ def clean_test_temp_files(request):
                 shutil.rmtree(file_path)
             else:
                 os.remove(file_path)
-    
+
     # Clean up Docker containers AFTER test (avoid conflicts with next test)
     try:
         subprocess.run(
             "docker ps -a | grep 'container_ci-dummy' | awk '{print $1}' | xargs -r docker rm -f",
             shell=True,
             capture_output=True,
-            timeout=30
+            timeout=30,
         )
     except Exception:
         pass  # Ignore cleanup errors after test
@@ -184,13 +185,14 @@ def is_nvidia() -> bool:
         bool: True if NVIDIA GPU is present, False otherwise.
     """
     global _gpu_vendor_cache
-    
+
     if _gpu_vendor_cache is not None:
         return _gpu_vendor_cache == "NVIDIA"
-    
+
     try:
         # Lazy import to avoid collection issues
         from madengine.core.context import Context
+
         context = Context()
         _gpu_vendor_cache = context.ctx["gpu_vendor"]
         return _gpu_vendor_cache == "NVIDIA"
@@ -216,6 +218,7 @@ def get_gpu_arch() -> str:
 
     try:
         from madengine.core.console import Console
+
         console = Console(live_output=True)
         if is_nvidia():
             arch = console.sh(
@@ -224,9 +227,10 @@ def get_gpu_arch() -> str:
             arch = arch.strip() if arch else ""
             # Normalize "NVIDIA A100-SXM4-40GB" -> "A100", "NVIDIA H100 PCIe" -> "H100"
             if arch.startswith("NVIDIA "):
-                arch = arch[len("NVIDIA "):].split("-")[0].split()[0]
+                arch = arch[len("NVIDIA ") :].split("-")[0].split()[0]
         else:
             from madengine.core.constants import get_rocm_path
+
             rocm_path = get_rocm_path()
             arch = console.sh(f"{rocm_path}/bin/rocminfo |grep -o -m 1 'gfx.*'")
             arch = arch.strip() if arch else ""
@@ -244,17 +248,18 @@ def get_gpu_nodeid_map() -> dict:
         dict: GPU node id map mapping node_id strings to GPU indices.
     """
     global _gpu_nodeid_map_cache
-    
+
     if _gpu_nodeid_map_cache is not None:
         return _gpu_nodeid_map_cache
-    
+
     try:
         # Lazy import to avoid collection issues
         from madengine.core.console import Console
+
         gpu_map = {}
         console = Console(live_output=True)
         nvidia = is_nvidia()
-        
+
         if nvidia:
             command = "nvidia-smi --list-gpus"
             output = console.sh(command)
@@ -280,14 +285,18 @@ def get_gpu_nodeid_map() -> dict:
                     # Parse version as tuple for proper comparison (6.4.1 vs 6.4.0)
                     version_parts = rocm_version_str.split(".")
                     if len(version_parts) >= 3:
-                        rocm_version = tuple(int(p.split('-')[0]) for p in version_parts[:3])
+                        rocm_version = tuple(
+                            int(p.split("-")[0]) for p in version_parts[:3]
+                        )
                     else:
                         # Fallback to float comparison for versions without patch
                         rocm_version = (int(version_parts[0]), int(version_parts[1]), 0)
-                    
+
                     # Use appropriate rocm-smi command based on version (PR #54: threshold is 6.4.1)
                     command = (
-                        "rocm-smi --showuniqueid" if rocm_version < (6, 4, 1) else "rocm-smi --showhw"
+                        "rocm-smi --showuniqueid"
+                        if rocm_version < (6, 4, 1)
+                        else "rocm-smi --showhw"
                     )
                     output = console.sh(command)
                     lines = output.split("\n")
@@ -295,7 +304,9 @@ def get_gpu_nodeid_map() -> dict:
                     for line in lines:
                         if rocm_version < (6, 4, 1):
                             if "Unique ID:" in line:
-                                gpu_id = int(line.split(":")[0].split("[")[1].split("]")[0])
+                                gpu_id = int(
+                                    line.split(":")[0].split("[")[1].split("]")[0]
+                                )
                                 unique_id = line.split(":")[2].strip()
                                 gpu_map[unique_id] = gpu_id
                         else:
@@ -306,13 +317,22 @@ def get_gpu_nodeid_map() -> dict:
                 except Exception:
                     # If all else fails, return empty map
                     pass
-        
+
         _gpu_nodeid_map_cache = gpu_map
         return gpu_map
-    
+
     except Exception:
         # If detection fails during collection, return a default mapping
-        _gpu_nodeid_map_cache = {'2': 0, '3': 1, '4': 2, '5': 3, '6': 4, '7': 5, '8': 6, '9': 7}
+        _gpu_nodeid_map_cache = {
+            "2": 0,
+            "3": 1,
+            "4": 2,
+            "5": 3,
+            "6": 4,
+            "7": 5,
+            "8": 6,
+            "9": 7,
+        }
         return _gpu_nodeid_map_cache
 
 
@@ -323,10 +343,10 @@ def get_num_gpus() -> int:
         int: Number of GPUs present.
     """
     global _num_gpus_cache
-    
+
     if _num_gpus_cache is not None:
         return _num_gpus_cache
-    
+
     try:
         gpu_map = get_gpu_nodeid_map()
         _num_gpus_cache = len(gpu_map)
@@ -351,8 +371,11 @@ def get_num_cpus() -> int:
     try:
         # Lazy import to avoid collection issues
         from madengine.core.console import Console
+
         console = Console(live_output=True)
-        _num_cpus_cache = int(console.sh("lscpu | grep \"^CPU(s):\" | awk '{print $2}'"))
+        _num_cpus_cache = int(
+            console.sh("lscpu | grep \"^CPU(s):\" | awk '{print $2}'")
+        )
         return _num_cpus_cache
     except Exception:
         # Default to 64 CPUs if detection fails during collection
@@ -423,7 +446,9 @@ def assert_model_in_perf_csv(csv_path, model, status="SUCCESS", performance=None
                 pytest.fail(
                     f"model {model} in perf CSV did not run successfully (status={row.get('status')})."
                 )
-            if performance is not None and str(row.get("performance", "")) != str(performance):
+            if performance is not None and str(row.get("performance", "")) != str(
+                performance
+            ):
                 pytest.fail(
                     f"model {model} expected performance {performance}, got {row.get('performance')}."
                 )
diff --git a/tests/integration/test_container_execution.py b/tests/integration/test_container_execution.py
index b0d11b3b..f70ba574 100644
--- a/tests/integration/test_container_execution.py
+++ b/tests/integration/test_container_execution.py
@@ -8,19 +8,21 @@
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
+import json
+
 # built-in modules
 import os
-import json
 import unittest.mock
-from unittest.mock import patch, MagicMock, mock_open
+from unittest.mock import MagicMock, mock_open, patch
 
 # third-party modules
 import pytest
 
+from madengine.core.console import Console
+from madengine.core.context import Context
+
 # project modules
 from madengine.execution.container_runner import ContainerRunner
-from madengine.core.context import Context
-from madengine.core.console import Console
 
 
 class TestContainerRunner:
@@ -188,14 +190,17 @@ def test_run_container_success(
         mock_sh.return_value = "hostname"
 
         # Mock log file with performance metrics
-        log_content = "Running test...\nperformance: 100.5 samples_per_second\nTest completed"
+        log_content = (
+            "Running test...\nperformance: 100.5 samples_per_second\nTest completed"
+        )
         mock_file.return_value.read.return_value = log_content
-        
+
         # Mock os.path.exists to return True for log file
         def exists_side_effect(path):
             if path.endswith(".live.log"):
                 return True
             return False
+
         mock_exists.side_effect = exists_side_effect
 
         model_info = {
diff --git a/tests/integration/test_docker_integration.py b/tests/integration/test_docker_integration.py
index 66455536..92d7396e 100644
--- a/tests/integration/test_docker_integration.py
+++ b/tests/integration/test_docker_integration.py
@@ -5,20 +5,22 @@
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
+import json
+
 # built-in modules
 import os
-import json
 import shlex
 import tempfile
-from unittest.mock import patch, MagicMock, mock_open
+from unittest.mock import MagicMock, mock_open, patch
 
 # third-party modules
 import pytest
 
+from madengine.core.console import Console
+from madengine.core.context import Context
+
 # project modules
 from madengine.execution.docker_builder import DockerBuilder
-from madengine.core.context import Context
-from madengine.core.console import Console
 
 
 class TestDockerBuilder:
@@ -448,7 +450,10 @@ def test_export_build_manifest(
 
         # Set up some built images (key should match real DockerBuilder output)
         builder.built_images = {
-            "ci-model1": {"docker_image": "ci-model1", "dockerfile": "./docker/Dockerfile"}
+            "ci-model1": {
+                "docker_image": "ci-model1",
+                "dockerfile": "./docker/Dockerfile",
+            }
         }
 
         with patch("builtins.open", mock_open()) as mock_file:
@@ -777,8 +782,8 @@ def test_build_manifest_with_tagged_image(
         mock_vendor,
     ):
         """Test that build manifest includes registry_image when pushing to registry."""
-        import tempfile
         import os
+        import tempfile
 
         # Mock successful operations BEFORE creating Context
         # to avoid MagicMock objects being stored during initialization
diff --git a/tests/integration/test_gpu_management.py b/tests/integration/test_gpu_management.py
index 522fa038..95906050 100644
--- a/tests/integration/test_gpu_management.py
+++ b/tests/integration/test_gpu_management.py
@@ -28,13 +28,12 @@
 from madengine.core.console import Console
 
 
-
-
 def is_amd_gpu():
     """Check if system has AMD GPU."""
     try:
         import subprocess
-        result = subprocess.run(['rocm-smi'], capture_output=True, timeout=5)
+
+        result = subprocess.run(["rocm-smi"], capture_output=True, timeout=5)
         return result.returncode == 0
     except Exception:
         return False
@@ -44,226 +43,238 @@ def is_amd_gpu():
 # GPU Tool Manager Tests
 # ============================================================================
 
+
 class TestBaseGPUToolManager:
     """Test the base GPU tool manager abstract class."""
 
-    
-
 
 class TestROCmToolManager:
     """Test the ROCm tool manager with 6.4.1 threshold (PR #54)."""
-    
+
     def test_get_rocm_version_from_hipconfig(self):
         """Test ROCm version detection from hipconfig."""
         manager = ROCmToolManager()
-        
-        with patch.object(manager, 'is_tool_available', return_value=True), \
-             patch.object(manager, '_execute_shell_command') as mock_exec:
+
+        with patch.object(
+            manager, "is_tool_available", return_value=True
+        ), patch.object(manager, "_execute_shell_command") as mock_exec:
             mock_exec.return_value = (True, "6.4.1-12345", "")
-            
+
             version = manager.get_rocm_version()
-            
+
             assert version == (6, 4, 1)
             # Verify result is cached
             assert manager._get_cached_result("rocm_version") == (6, 4, 1)
-    
+
     def test_get_preferred_smi_tool_6_4_1_and_above(self):
         """Test that amd-smi is preferred for ROCm >= 6.4.1."""
         manager = ROCmToolManager()
-        
-        with patch.object(manager, 'get_rocm_version', return_value=(6, 4, 1)):
+
+        with patch.object(manager, "get_rocm_version", return_value=(6, 4, 1)):
             assert manager.get_preferred_smi_tool() == "amd-smi"
-        
-        with patch.object(manager, 'get_rocm_version', return_value=(6, 5, 0)):
+
+        with patch.object(manager, "get_rocm_version", return_value=(6, 5, 0)):
             assert manager.get_preferred_smi_tool() == "amd-smi"
-    
+
     def test_get_preferred_smi_tool_below_6_4_1(self):
         """Test that rocm-smi is preferred for ROCm < 6.4.1."""
         manager = ROCmToolManager()
-        
-        with patch.object(manager, 'get_rocm_version', return_value=(6, 4, 0)):
+
+        with patch.object(manager, "get_rocm_version", return_value=(6, 4, 0)):
             assert manager.get_preferred_smi_tool() == "rocm-smi"
-        
-        with patch.object(manager, 'get_rocm_version', return_value=(6, 3, 0)):
+
+        with patch.object(manager, "get_rocm_version", return_value=(6, 3, 0)):
             assert manager.get_preferred_smi_tool() == "rocm-smi"
-        
-        with patch.object(manager, 'get_rocm_version', return_value=(5, 7, 0)):
+
+        with patch.object(manager, "get_rocm_version", return_value=(5, 7, 0)):
             assert manager.get_preferred_smi_tool() == "rocm-smi"
-    
+
     def test_get_gpu_count_with_amd_smi(self):
         """Test GPU count detection using amd-smi."""
         manager = ROCmToolManager()
-        
-        with patch.object(manager, 'get_preferred_smi_tool', return_value="amd-smi"), \
-             patch.object(manager, 'execute_command', return_value="8"):
+
+        with patch.object(
+            manager, "get_preferred_smi_tool", return_value="amd-smi"
+        ), patch.object(manager, "execute_command", return_value="8"):
             count = manager.get_gpu_count()
-            
+
             assert count == 8
             # Verify caching
             assert manager._get_cached_result("gpu_count") == 8
-    
+
     def test_get_gpu_count_with_fallback_to_rocm_smi(self):
         """Test GPU count fallback from amd-smi to rocm-smi."""
         manager = ROCmToolManager()
-        
+
         def mock_execute(command, fallback=None, timeout=30):
             # Simulate amd-smi failure, rocm-smi success
             if "amd-smi" in command:
                 raise RuntimeError("amd-smi not found")
             return "4"
-        
-        with patch.object(manager, 'get_preferred_smi_tool', return_value="amd-smi"), \
-             patch.object(manager, 'execute_command', side_effect=mock_execute):
+
+        with patch.object(
+            manager, "get_preferred_smi_tool", return_value="amd-smi"
+        ), patch.object(manager, "execute_command", side_effect=mock_execute):
             # Should fallback successfully
-            with pytest.raises(RuntimeError):  # Our mock raises, but real impl would fallback
+            with pytest.raises(
+                RuntimeError
+            ):  # Our mock raises, but real impl would fallback
                 manager.get_gpu_count()
-    
+
     def test_get_gpu_product_name_with_fallback(self):
         """Test GPU product name with rocm-smi fallback (PR #54)."""
         manager = ROCmToolManager()
-        
-        with patch.object(manager, 'get_preferred_smi_tool', return_value="amd-smi"), \
-             patch.object(manager, 'execute_command', return_value="AMD Instinct MI300X"):
+
+        with patch.object(
+            manager, "get_preferred_smi_tool", return_value="amd-smi"
+        ), patch.object(manager, "execute_command", return_value="AMD Instinct MI300X"):
             product = manager.get_gpu_product_name(gpu_id=0)
-            
+
             assert product == "AMD Instinct MI300X"
-            assert manager._get_cached_result("gpu_product_name:0") == "AMD Instinct MI300X"
-    
+            assert (
+                manager._get_cached_result("gpu_product_name:0")
+                == "AMD Instinct MI300X"
+            )
+
     def test_get_gpu_architecture(self):
         """Test GPU architecture detection via rocminfo."""
         manager = ROCmToolManager()
-        
-        with patch.object(manager, '_execute_shell_command') as mock_exec:
+
+        with patch.object(manager, "_execute_shell_command") as mock_exec:
             mock_exec.return_value = (True, "gfx942", "")
-            
+
             arch = manager.get_gpu_architecture()
-            
+
             assert arch == "gfx942"
             assert manager._get_cached_result("gpu_architecture") == "gfx942"
-    
+
     def test_execute_command_with_fallback(self):
         """Test command execution with fallback mechanism."""
         manager = ROCmToolManager()
-        
-        with patch.object(manager, '_execute_shell_command') as mock_exec:
+
+        with patch.object(manager, "_execute_shell_command") as mock_exec:
             # First call fails, second succeeds
             mock_exec.side_effect = [
                 (False, "", "command not found"),
-                (True, "success", "")
+                (True, "success", ""),
             ]
-            
+
             result = manager.execute_command("primary_cmd", "fallback_cmd")
-            
+
             assert result == "success"
             assert mock_exec.call_count == 2
 
 
-
-
 class TestNvidiaToolManager:
     """Test the NVIDIA tool manager."""
-    
+
     def test_initialization(self):
         """Test NVIDIA tool manager initialization."""
         manager = NvidiaToolManager()
         assert manager is not None
-    
+
     def test_get_cuda_version_from_nvcc(self):
         """Test CUDA version detection from nvcc."""
         manager = NvidiaToolManager()
-        
-        with patch.object(manager, 'is_tool_available', return_value=True), \
-             patch.object(manager, '_execute_shell_command') as mock_exec:
+
+        with patch.object(
+            manager, "is_tool_available", return_value=True
+        ), patch.object(manager, "_execute_shell_command") as mock_exec:
             mock_exec.return_value = (True, "12.0", "")
-            
+
             version = manager.get_cuda_version()
-            
+
             assert version == "12.0"
             assert manager._get_cached_result("cuda_version") == "12.0"
-    
+
     def test_get_driver_version(self):
         """Test NVIDIA driver version detection."""
         manager = NvidiaToolManager()
-        
-        with patch.object(manager, 'is_tool_available', return_value=True), \
-             patch.object(manager, '_execute_shell_command') as mock_exec:
+
+        with patch.object(
+            manager, "is_tool_available", return_value=True
+        ), patch.object(manager, "_execute_shell_command") as mock_exec:
             mock_exec.return_value = (True, "525.60.13", "")
-            
+
             version = manager.get_driver_version()
-            
+
             assert version == "525.60.13"
-    
+
     def test_execute_nvidia_smi(self):
         """Test nvidia-smi execution."""
         manager = NvidiaToolManager()
-        
-        with patch.object(manager, 'is_tool_available', return_value=True), \
-             patch.object(manager, 'execute_command', return_value="GPU info"):
+
+        with patch.object(
+            manager, "is_tool_available", return_value=True
+        ), patch.object(manager, "execute_command", return_value="GPU info"):
             result = manager.execute_nvidia_smi("--list-gpus")
-            
+
             assert result == "GPU info"
-    
+
     def test_get_gpu_count(self):
         """Test NVIDIA GPU count detection."""
         manager = NvidiaToolManager()
-        
-        with patch.object(manager, 'execute_nvidia_smi', return_value="8"):
+
+        with patch.object(manager, "execute_nvidia_smi", return_value="8"):
             count = manager.get_gpu_count()
-            
+
             assert count == 8
-    
+
     def test_get_gpu_product_name(self):
         """Test NVIDIA GPU product name detection."""
         manager = NvidiaToolManager()
-        
-        with patch.object(manager, 'execute_nvidia_smi', return_value="NVIDIA H100 80GB HBM3"):
-            product = manager.get_gpu_product_name(gpu_id=0)
-            
-            assert product == "NVIDIA H100 80GB HBM3"
 
+        with patch.object(
+            manager, "execute_nvidia_smi", return_value="NVIDIA H100 80GB HBM3"
+        ):
+            product = manager.get_gpu_product_name(gpu_id=0)
 
+            assert product == "NVIDIA H100 80GB HBM3"
 
 
 class TestGPUToolFactory:
     """Test the GPU tool factory with singleton pattern."""
-    
+
     def setup_method(self):
         """Clear factory cache before each test."""
         clear_manager_cache()
-    
+
     def teardown_method(self):
         """Clear factory cache after each test."""
         clear_manager_cache()
-    
+
     def test_get_amd_manager(self):
         """Test getting AMD tool manager."""
-        with patch('madengine.utils.gpu_validator.detect_gpu_vendor', return_value=GPUVendor.AMD):
+        with patch(
+            "madengine.utils.gpu_validator.detect_gpu_vendor",
+            return_value=GPUVendor.AMD,
+        ):
             manager = get_gpu_tool_manager(GPUVendor.AMD)
-            
+
             assert isinstance(manager, ROCmToolManager)
-    
+
     def test_get_nvidia_manager(self):
         """Test getting NVIDIA tool manager."""
         manager = get_gpu_tool_manager(GPUVendor.NVIDIA)
-        
+
         assert isinstance(manager, NvidiaToolManager)
-    
+
     def test_singleton_pattern(self):
         """Test that factory returns same instance (singleton)."""
         manager1 = get_gpu_tool_manager(GPUVendor.AMD)
         manager2 = get_gpu_tool_manager(GPUVendor.AMD)
-        
+
         assert manager1 is manager2  # Same instance
-    
+
     def test_different_vendors_different_instances(self):
         """Test that different vendors get different instances."""
         amd_manager = get_gpu_tool_manager(GPUVendor.AMD)
         nvidia_manager = get_gpu_tool_manager(GPUVendor.NVIDIA)
-        
+
         assert amd_manager is not nvidia_manager
         assert isinstance(amd_manager, ROCmToolManager)
         assert isinstance(nvidia_manager, NvidiaToolManager)
-    
+
     def test_auto_detect_vendor(self):
         """Test auto-detection of GPU vendor."""
         with patch(
@@ -273,23 +284,23 @@ def test_auto_detect_vendor(self):
             manager = get_gpu_tool_manager(vendor=None)
 
             assert isinstance(manager, ROCmToolManager)
-    
+
     def test_unknown_vendor_raises_error(self):
         """Test that unknown vendor raises appropriate error."""
         with pytest.raises(ValueError, match="Unable to detect GPU vendor"):
             get_gpu_tool_manager(GPUVendor.UNKNOWN)
-    
+
     def test_clear_manager_cache(self):
         """Test clearing manager cache."""
         manager1 = get_gpu_tool_manager(GPUVendor.AMD)
-        
+
         clear_manager_cache()
-        
+
         manager2 = get_gpu_tool_manager(GPUVendor.AMD)
-        
+
         # After clearing cache, should get new instance
         assert manager1 is not manager2
-    
+
     def test_get_cached_managers(self):
         """Test getting dictionary of cached managers."""
         amd_manager = get_gpu_tool_manager(GPUVendor.AMD)
@@ -307,125 +318,124 @@ def test_get_cached_managers(self):
         assert cached[nvidia_keys[0]] is nvidia_manager
 
 
-
-
 class TestToolManagerIntegration:
     """Integration tests for tool managers with Context."""
-    
+
     def test_context_uses_tool_manager_for_gpu_count(self):
         """Test that Context uses tool manager for GPU count."""
         from madengine.core.context import Context
-        
-        additional_context = json.dumps({
-            "gpu_vendor": "AMD",
-            "guest_os": "UBUNTU"
-        })
-        
-        with patch('madengine.core.context.Context.get_gpu_vendor', return_value="AMD"), \
-             patch('madengine.core.context.Context._get_tool_manager') as mock_get_manager:
-            
+
+        additional_context = json.dumps({"gpu_vendor": "AMD", "guest_os": "UBUNTU"})
+
+        with patch(
+            "madengine.core.context.Context.get_gpu_vendor", return_value="AMD"
+        ), patch(
+            "madengine.core.context.Context._get_tool_manager"
+        ) as mock_get_manager:
+
             mock_manager = Mock()
             mock_manager.get_gpu_count.return_value = 8
             mock_get_manager.return_value = mock_manager
-            
+
             context = Context(
-                additional_context=additional_context,
-                build_only_mode=True
+                additional_context=additional_context, build_only_mode=True
             )
-            
+
             # Force initialization of docker_env_vars
             context.ctx["docker_env_vars"] = {"MAD_GPU_VENDOR": "AMD"}
-            
+
             count = context.get_system_ngpus()
-            
+
             assert count == 8
             mock_manager.get_gpu_count.assert_called_once()
-    
+
     def test_context_uses_tool_manager_for_product_name(self):
         """Test that Context uses tool manager for GPU product name (PR #54)."""
         from madengine.core.context import Context
-        
-        additional_context = json.dumps({
-            "gpu_vendor": "AMD",
-            "guest_os": "UBUNTU"
-        })
-        
-        with patch('madengine.core.context.Context._get_tool_manager') as mock_get_manager:
+
+        additional_context = json.dumps({"gpu_vendor": "AMD", "guest_os": "UBUNTU"})
+
+        with patch(
+            "madengine.core.context.Context._get_tool_manager"
+        ) as mock_get_manager:
             mock_manager = Mock()
             mock_manager.get_gpu_product_name.return_value = "AMD Instinct MI300X"
             mock_get_manager.return_value = mock_manager
-            
+
             context = Context(
-                additional_context=additional_context,
-                build_only_mode=True
+                additional_context=additional_context, build_only_mode=True
             )
-            
+
             context.ctx["docker_env_vars"] = {"MAD_GPU_VENDOR": "AMD"}
-            
+
             product = context.get_system_gpu_product_name()
-            
+
             assert product == "AMD Instinct MI300X"
             mock_manager.get_gpu_product_name.assert_called_once_with(gpu_id=0)
 
 
-
-
 class TestPR54Compliance:
     """Test compliance with PR #54 requirements."""
-    
+
     def test_rocm_version_threshold_is_6_4_1(self):
         """Test that ROCm version threshold matches PR #54."""
-        assert ROCM_VERSION_THRESHOLD == (6, 4, 1), \
-            "ROCm version threshold must be 6.4.1 as per PR #54"
-    
+        assert ROCM_VERSION_THRESHOLD == (
+            6,
+            4,
+            1,
+        ), "ROCm version threshold must be 6.4.1 as per PR #54"
+
     def test_amd_smi_preferred_for_6_4_1_and_above(self):
         """Test amd-smi is preferred for ROCm >= 6.4.1 (PR #54)."""
         manager = ROCmToolManager()
-        
+
         test_versions = [
             ((6, 4, 1), "amd-smi"),
             ((6, 4, 2), "amd-smi"),
             ((6, 5, 0), "amd-smi"),
             ((7, 0, 0), "amd-smi"),
         ]
-        
+
         for version, expected_tool in test_versions:
-            with patch.object(manager, 'get_rocm_version', return_value=version):
+            with patch.object(manager, "get_rocm_version", return_value=version):
                 tool = manager.get_preferred_smi_tool()
-                assert tool == expected_tool, \
-                    f"ROCm {version} should prefer {expected_tool}"
-    
+                assert (
+                    tool == expected_tool
+                ), f"ROCm {version} should prefer {expected_tool}"
+
     def test_rocm_smi_used_for_below_6_4_1(self):
         """Test rocm-smi is used for ROCm < 6.4.1 (PR #54)."""
         manager = ROCmToolManager()
-        
+
         test_versions = [
             ((6, 4, 0), "rocm-smi"),
             ((6, 3, 0), "rocm-smi"),
             ((6, 0, 0), "rocm-smi"),
             ((5, 7, 0), "rocm-smi"),
         ]
-        
+
         for version, expected_tool in test_versions:
-            with patch.object(manager, 'get_rocm_version', return_value=version):
+            with patch.object(manager, "get_rocm_version", return_value=version):
                 tool = manager.get_preferred_smi_tool()
-                assert tool == expected_tool, \
-                    f"ROCm {version} should use {expected_tool}"
-    
+                assert (
+                    tool == expected_tool
+                ), f"ROCm {version} should use {expected_tool}"
+
     def test_gpu_product_name_has_fallback(self):
         """Test GPU product name has rocm-smi fallback (PR #54)."""
         manager = ROCmToolManager()
-        
+
         # Verify the method supports fallback by checking it calls execute_command
-        with patch.object(manager, 'get_preferred_smi_tool', return_value="amd-smi"), \
-             patch.object(manager, 'execute_command') as mock_exec:
+        with patch.object(
+            manager, "get_preferred_smi_tool", return_value="amd-smi"
+        ), patch.object(manager, "execute_command") as mock_exec:
             mock_exec.return_value = "AMD Instinct MI300X"
-            
+
             product = manager.get_gpu_product_name(0)
-            
+
             # Verify execute_command was called (which has fallback logic)
             mock_exec.assert_called_once()
-            
+
             # Verify both amd-smi and rocm-smi commands are in the call
             call_args = mock_exec.call_args
             assert "amd-smi" in str(call_args) or "rocm-smi" in str(call_args)
@@ -435,12 +445,11 @@ def test_gpu_product_name_has_fallback(self):
     pytest.main([__file__, "-v"])
 
 
-
-
 # ============================================================================
 # GPU RenderD Nodes Tests
 # ============================================================================
 
+
 class TestGetGpuRenderDNodesIntegration:
     """Integration test suite for the get_gpu_renderD_nodes method using real hardware."""
 
@@ -449,13 +458,19 @@ def test_returns_none_for_non_amd_gpu(self):
         """Test that the function returns None for non-AMD GPUs."""
         from unittest.mock import patch
 
-        with patch.object(Context, "get_gpu_vendor", return_value="NVIDIA"), \
-             patch.object(Context, "get_system_ngpus", return_value=0), \
-             patch.object(Context, "get_system_gpu_architecture", return_value=""), \
-             patch.object(Context, "get_system_gpu_product_name", return_value=""), \
-             patch.object(Context, "get_system_hip_version", return_value="5.0"), \
-             patch.object(Context, "get_docker_gpus", return_value="0"), \
-             patch.object(Context, "get_gpu_renderD_nodes", return_value=None):
+        with patch.object(
+            Context, "get_gpu_vendor", return_value="NVIDIA"
+        ), patch.object(Context, "get_system_ngpus", return_value=0), patch.object(
+            Context, "get_system_gpu_architecture", return_value=""
+        ), patch.object(
+            Context, "get_system_gpu_product_name", return_value=""
+        ), patch.object(
+            Context, "get_system_hip_version", return_value="5.0"
+        ), patch.object(
+            Context, "get_docker_gpus", return_value="0"
+        ), patch.object(
+            Context, "get_gpu_renderD_nodes", return_value=None
+        ):
             context = Context()
 
         # Should return None for non-AMD GPUs
@@ -466,139 +481,149 @@ def test_returns_none_for_non_amd_gpu(self):
     def test_returns_list_for_amd_gpu(self):
         """Test that the function returns a list of renderD nodes for AMD GPUs."""
         context = Context()
-        
+
         # Should return a list for AMD GPUs
-        assert context.ctx['gpu_renderDs'] is not None
-        assert isinstance(context.ctx['gpu_renderDs'], list)
-        
+        assert context.ctx["gpu_renderDs"] is not None
+        assert isinstance(context.ctx["gpu_renderDs"], list)
+
         # List should not be empty if there are GPUs
-        if context.ctx['docker_env_vars']['MAD_SYSTEM_NGPUS'] > 0:
-            assert len(context.ctx['gpu_renderDs']) > 0
+        if context.ctx["docker_env_vars"]["MAD_SYSTEM_NGPUS"] > 0:
+            assert len(context.ctx["gpu_renderDs"]) > 0
 
     @pytest.mark.skipif(not is_amd_gpu(), reason="Test requires AMD GPU")
     def test_renderD_count_matches_gpu_count(self):
         """Test that the number of renderD nodes matches the number of GPUs."""
         context = Context()
-        
+
         # Get GPU count from context (which uses amd-smi list --csv or rocm-smi as fallback)
         # This is more reliable than amd-smi list -e --json which only works on ROCm 6.4+
-        expected_gpu_count = context.ctx['docker_env_vars']['MAD_SYSTEM_NGPUS']
-        
+        expected_gpu_count = context.ctx["docker_env_vars"]["MAD_SYSTEM_NGPUS"]
+
         # Skip test if no GPUs detected
         if expected_gpu_count == 0:
             pytest.skip("No GPUs detected on system")
-        
+
         # The number of renderD nodes should match the number of GPUs
-        assert len(context.ctx['gpu_renderDs']) == expected_gpu_count
-        
+        assert len(context.ctx["gpu_renderDs"]) == expected_gpu_count
+
     @pytest.mark.skipif(not is_amd_gpu(), reason="Test requires AMD GPU")
     def test_renderD_values_are_valid(self):
         """Test that all renderD values are valid integers."""
         context = Context()
-        
+
         # All renderD values should be positive integers
-        for renderD in context.ctx['gpu_renderDs']:
+        for renderD in context.ctx["gpu_renderDs"]:
             assert isinstance(renderD, int)
             assert renderD > 0
-            
+
     @pytest.mark.skipif(not is_amd_gpu(), reason="Test requires AMD GPU")
     def test_renderD_nodes_are_unique(self):
         """Test that all renderD nodes are unique."""
         context = Context()
-        
-        renderDs = context.ctx['gpu_renderDs']
+
+        renderDs = context.ctx["gpu_renderDs"]
         # All renderD values should be unique
         assert len(renderDs) == len(set(renderDs))
-        
+
     @pytest.mark.skipif(not is_amd_gpu(), reason="Test requires AMD GPU")
     def test_renderD_values_match_kfd_properties(self):
         """Test that renderD values match what's in KFD properties."""
         console = Console()
         context = Context()
-        
+
         # Get renderD values from KFD directly
         try:
-            kfd_output = console.sh("grep -r drm_render_minor /sys/devices/virtual/kfd/kfd/topology/nodes")
+            kfd_output = console.sh(
+                "grep -r drm_render_minor /sys/devices/virtual/kfd/kfd/topology/nodes"
+            )
             kfd_lines = [line for line in kfd_output.split("\n") if line.strip()]
             # Filter out CPU entries (renderD value 0)
-            kfd_renderDs = [int(line.split()[-1]) for line in kfd_lines if int(line.split()[-1]) != 0]
+            kfd_renderDs = [
+                int(line.split()[-1])
+                for line in kfd_lines
+                if int(line.split()[-1]) != 0
+            ]
         except Exception:
             pytest.skip("Unable to read KFD properties")
-        
+
         # The renderD values from context should be a subset of KFD renderDs
-        for renderD in context.ctx['gpu_renderDs']:
-            assert renderD in kfd_renderDs, f"renderD {renderD} not found in KFD properties"
-    
+        for renderD in context.ctx["gpu_renderDs"]:
+            assert (
+                renderD in kfd_renderDs
+            ), f"renderD {renderD} not found in KFD properties"
+
     @pytest.mark.skipif(not is_amd_gpu(), reason="Test requires AMD GPU")
     def test_gpu_ordering_is_consistent(self):
         """Test that GPU ordering matches amd-smi GPU IDs."""
         console = Console()
         context = Context()
-        
+
         try:
             # Get amd-smi data
             amd_smi_output = console.sh("amd-smi list -e --json")
             gpu_data = json.loads(amd_smi_output)
-            
+
             # Sort by GPU ID
             sorted_gpus = sorted(gpu_data, key=lambda x: x["gpu"])
-            
+
             # The number of GPUs should match
-            assert len(context.ctx['gpu_renderDs']) == len(sorted_gpus)
-            
+            assert len(context.ctx["gpu_renderDs"]) == len(sorted_gpus)
+
         except Exception:
             pytest.skip("Unable to verify GPU ordering with amd-smi")
-    
+
     @pytest.mark.skipif(not is_amd_gpu(), reason="Test requires AMD GPU")
     def test_renderD_nodes_exist_in_dev(self):
         """Test that the renderD nodes actually exist in /dev/dri/."""
         context = Context()
-        
+
         # Check that each renderD node exists as a device file
-        for renderD in context.ctx['gpu_renderDs']:
+        for renderD in context.ctx["gpu_renderDs"]:
             dev_path = f"/dev/dri/renderD{renderD}"
             assert os.path.exists(dev_path), f"Device {dev_path} does not exist"
             # Should be a character device
-            assert stat.S_ISCHR(os.stat(dev_path).st_mode), f"{dev_path} is not a character device"
-    
+            assert stat.S_ISCHR(
+                os.stat(dev_path).st_mode
+            ), f"{dev_path} is not a character device"
+
     @pytest.mark.skipif(not is_amd_gpu(), reason="Test requires AMD GPU")
     def test_no_cpu_entries_in_renderDs(self):
         """Test that CPU entries (renderD=0) are not included."""
         context = Context()
-        
+
         # None of the renderD values should be 0 (CPUs)
-        for renderD in context.ctx['gpu_renderDs']:
+        for renderD in context.ctx["gpu_renderDs"]:
             assert renderD != 0, "CPU entry (renderD=0) found in GPU renderD list"
-    
+
     @pytest.mark.skipif(not is_amd_gpu(), reason="Test requires AMD GPU")
     def test_context_initialization_succeeds(self):
         """Test that Context initialization succeeds with real GPU data."""
         # This should not raise any exceptions
         context = Context()
-        
+
         # Basic sanity checks
         assert context.ctx is not None
-        assert 'gpu_renderDs' in context.ctx
-        assert 'docker_env_vars' in context.ctx
-        assert 'MAD_GPU_VENDOR' in context.ctx['docker_env_vars']
-        
+        assert "gpu_renderDs" in context.ctx
+        assert "docker_env_vars" in context.ctx
+        assert "MAD_GPU_VENDOR" in context.ctx["docker_env_vars"]
+
     @pytest.mark.skipif(not is_amd_gpu(), reason="Test requires AMD GPU")
     def test_renderD_mapping_is_reproducible(self):
         """Test that creating multiple Context objects produces the same renderD mapping."""
         context1 = Context()
         context2 = Context()
-        
+
         # The renderD lists should be identical
-        assert context1.ctx['gpu_renderDs'] == context2.ctx['gpu_renderDs']
-        
+        assert context1.ctx["gpu_renderDs"] == context2.ctx["gpu_renderDs"]
+
     @pytest.mark.skipif(not is_amd_gpu(), reason="Test requires AMD GPU")
     def test_renderD_values_are_in_valid_range(self):
         """Test that renderD values are in the valid Linux device range."""
         context = Context()
-        
+
         # renderD values typically start at 128 and go up
         # Valid range is 128-255 for render nodes
-        for renderD in context.ctx['gpu_renderDs']:
-            assert 128 <= renderD <= 255, f"renderD {renderD} is outside valid range [128, 255]"
-
-
+        for renderD in context.ctx["gpu_renderDs"]:
+            assert (
+                128 <= renderD <= 255
+            ), f"renderD {renderD} is outside valid range [128, 255]"
diff --git a/tests/integration/test_orchestrator_workflows.py b/tests/integration/test_orchestrator_workflows.py
index 143e56e5..09264d80 100644
--- a/tests/integration/test_orchestrator_workflows.py
+++ b/tests/integration/test_orchestrator_workflows.py
@@ -18,15 +18,15 @@
 
 # project modules
 from madengine.cli import app
+from madengine.core.errors import BuildError, ConfigurationError, DiscoveryError
 from madengine.orchestration.build_orchestrator import BuildOrchestrator
 from madengine.orchestration.run_orchestrator import RunOrchestrator
-from madengine.core.errors import BuildError, ConfigurationError, DiscoveryError
-
 
 # ============================================================================
 # Batch manifest (CLI build options)
 # ============================================================================
 
+
 class TestBatchManifestBuildIntegration:
     """Batch manifest and --tags are mutually exclusive."""
 
@@ -42,9 +42,12 @@ def test_batch_manifest_mutually_exclusive_with_tags(self):
                 app,
                 [
                     "build",
-                    "--batch-manifest", batch_file,
-                    "--tags", "dummy",
-                    "--additional-context", '{"gpu_vendor": "AMD", "guest_os": "UBUNTU"}',
+                    "--batch-manifest",
+                    batch_file,
+                    "--tags",
+                    "dummy",
+                    "--additional-context",
+                    '{"gpu_vendor": "AMD", "guest_os": "UBUNTU"}',
                 ],
             )
             assert result.exit_code != 0
@@ -57,6 +60,7 @@ def test_batch_manifest_mutually_exclusive_with_tags(self):
 # Build orchestrator
 # ============================================================================
 
+
 class TestBuildOrchestrator:
     """Test the Build Orchestrator module."""
 
@@ -163,7 +167,9 @@ def test_build_execute_success(
 
         # Mock context
         mock_context = MagicMock()
-        mock_context.ctx = {"docker_build_arg": {"MAD_SYSTEM_GPU_ARCHITECTURE": "gfx90a"}}
+        mock_context.ctx = {
+            "docker_build_arg": {"MAD_SYSTEM_GPU_ARCHITECTURE": "gfx90a"}
+        }
         mock_context_class.return_value = mock_context
 
         # Mock discover models
@@ -356,9 +362,7 @@ def test_run_execute_no_manifest_no_tags(self, mock_exists):
             orchestrator.execute(manifest_file=None, tags=None)
 
     @patch("madengine.orchestration.build_orchestrator.BuildOrchestrator")
-    def test_run_execute_triggers_build_phase(
-        self, mock_build_orchestrator
-    ):
+    def test_run_execute_triggers_build_phase(self, mock_build_orchestrator):
         """Test run execution triggers build phase when no manifest exists."""
         mock_args = MagicMock()
         mock_args.additional_context = None
@@ -378,9 +382,13 @@ def test_run_execute_triggers_build_phase(
         orchestrator = RunOrchestrator(mock_args)
 
         # Mock file operations and execution
-        with patch("os.path.exists", side_effect=lambda p: p == "build_manifest.json"), \
-             patch("builtins.open", mock_open(read_data=json.dumps(manifest_data))), \
-             patch.object(orchestrator, "_execute_local", return_value={}) as mock_execute_local:
+        with patch(
+            "os.path.exists", side_effect=lambda p: p == "build_manifest.json"
+        ), patch(
+            "builtins.open", mock_open(read_data=json.dumps(manifest_data))
+        ), patch.object(
+            orchestrator, "_execute_local", return_value={}
+        ) as mock_execute_local:
             orchestrator.execute(manifest_file=None, tags=["test"])
 
         mock_build_instance.execute.assert_called_once()
@@ -442,9 +450,7 @@ def test_run_execute_distributed(self, mock_exists, mock_file):
         read_data='{"built_images": {"model1": {"name": "model1"}}, "context": {}}',
     )
     @patch("os.path.exists", return_value=True)
-    def test_execute_local_with_mock(
-        self, mock_exists, mock_file
-    ):
+    def test_execute_local_with_mock(self, mock_exists, mock_file):
         """Test local execution workflow (mocked)."""
         mock_args = MagicMock()
         mock_args.additional_context = '{"deploy": "local"}'
@@ -470,9 +476,20 @@ def test_filter_images_by_gpu_architecture(self):
         orchestrator = RunOrchestrator(mock_args)
 
         built_images = {
-            "model1": {"name": "model1", "gpu_architecture": "gfx90a", "gpu_vendor": "AMD"},
-            "model2": {"name": "model2", "gpu_architecture": "gfx908", "gpu_vendor": "AMD"},
-            "model3": {"name": "model3", "gpu_architecture": ""},  # Legacy - no gpu_vendor
+            "model1": {
+                "name": "model1",
+                "gpu_architecture": "gfx90a",
+                "gpu_vendor": "AMD",
+            },
+            "model2": {
+                "name": "model2",
+                "gpu_architecture": "gfx908",
+                "gpu_vendor": "AMD",
+            },
+            "model3": {
+                "name": "model3",
+                "gpu_architecture": "",
+            },  # Legacy - no gpu_vendor
         }
 
         # Filter for gfx90a
@@ -483,4 +500,3 @@ def test_filter_images_by_gpu_architecture(self):
         assert "model1" in compatible
         assert "model2" not in compatible
         assert "model3" in compatible  # Legacy images without gpu_vendor pass through
-
diff --git a/tests/integration/test_platform_integration.py b/tests/integration/test_platform_integration.py
index 82519a89..c13b031e 100644
--- a/tests/integration/test_platform_integration.py
+++ b/tests/integration/test_platform_integration.py
@@ -10,25 +10,26 @@
 import json
 import os
 from pathlib import Path
-from unittest.mock import MagicMock, patch, mock_open
+from unittest.mock import MagicMock, mock_open, patch
+
 import pytest
 
+from madengine.core.errors import BuildError
 from madengine.execution.docker_builder import DockerBuilder
 from madengine.execution.dockerfile_utils import (
-    parse_dockerfile_gpu_variables,
-    normalize_architecture_name,
-    is_target_arch_compatible_with_variable,
     is_compilation_arch_compatible,
+    is_target_arch_compatible_with_variable,
+    normalize_architecture_name,
+    parse_dockerfile_gpu_variables,
 )
 from madengine.orchestration.build_orchestrator import BuildOrchestrator
 from madengine.orchestration.run_orchestrator import RunOrchestrator
-from madengine.core.errors import BuildError
-
 
 # ============================================================================
 # Multi-Platform Build Tests
 # ============================================================================
 
+
 class TestMultiPlatformBuild:
     """Test build orchestration across different platforms."""
 
@@ -44,14 +45,16 @@ def test_build_initialization_all_platforms(
         ):
             with patch("os.path.exists", return_value=False):
                 orchestrator = BuildOrchestrator(mock_build_args)
-                
+
                 assert orchestrator.args == mock_build_args
                 assert orchestrator.context == multi_platform_context
                 assert orchestrator.credentials is None
 
     @pytest.mark.unit
     @pytest.mark.amd
-    def test_build_amd_gpu_architecture_detection(self, amd_gpu_context, mock_build_args):
+    def test_build_amd_gpu_architecture_detection(
+        self, amd_gpu_context, mock_build_args
+    ):
         """Test AMD GPU architecture is correctly detected and used."""
         with patch(
             "madengine.orchestration.build_orchestrator.Context",
@@ -59,7 +62,7 @@ def test_build_amd_gpu_architecture_detection(self, amd_gpu_context, mock_build_
         ):
             with patch("os.path.exists", return_value=False):
                 orchestrator = BuildOrchestrator(mock_build_args)
-                
+
                 assert orchestrator.context.get_gpu_vendor() == "AMD"
                 assert orchestrator.context.get_system_gpu_architecture() == "gfx90a"
 
@@ -75,7 +78,7 @@ def test_build_nvidia_gpu_architecture_detection(
         ):
             with patch("os.path.exists", return_value=False):
                 orchestrator = BuildOrchestrator(mock_build_args)
-                
+
                 assert orchestrator.context.get_gpu_vendor() == "NVIDIA"
                 assert orchestrator.context.get_system_gpu_architecture() == "sm_90"
 
@@ -89,7 +92,7 @@ def test_build_cpu_only_mode(self, cpu_context, mock_build_args):
         ):
             with patch("os.path.exists", return_value=False):
                 orchestrator = BuildOrchestrator(mock_build_args)
-                
+
                 assert orchestrator.context.get_gpu_vendor() == "NONE"
                 assert orchestrator.context.get_system_ngpus() == 0
 
@@ -98,6 +101,7 @@ def test_build_cpu_only_mode(self, cpu_context, mock_build_args):
 # Error Handling and Resilience Tests
 # ============================================================================
 
+
 class TestBuildResilience:
     """Test build resilience and error handling."""
 
@@ -141,7 +145,9 @@ def test_partial_build_failure_saves_manifest(
                             mock_builder_instance.export_build_manifest.assert_called_once()
 
                             # Verify successful builds are available
-                            summary = mock_builder_instance.build_all_models.return_value
+                            summary = (
+                                mock_builder_instance.build_all_models.return_value
+                            )
                             assert len(summary["successful_builds"]) == 1
                             assert len(summary["failed_builds"]) == 1
 
@@ -244,6 +250,7 @@ def test_multi_model_build_continues_on_single_failure(
 # Multi-Architecture Build Tests
 # ============================================================================
 
+
 class TestMultiArchitectureBuild:
     """Test multi-architecture build scenarios."""
 
@@ -302,7 +309,9 @@ def test_multi_arch_amd_builds(self, mock_build_args, amd_gpu_context):
                             manifest_file = orchestrator.execute()
 
                             # Verify all architectures were built
-                            summary = mock_builder_instance.build_all_models.return_value
+                            summary = (
+                                mock_builder_instance.build_all_models.return_value
+                            )
                             assert len(summary["successful_builds"]) == 3
                             archs = [
                                 b["gpu_architecture"]
@@ -317,6 +326,7 @@ def test_multi_arch_amd_builds(self, mock_build_args, amd_gpu_context):
 # Run Orchestrator Multi-Platform Tests
 # ============================================================================
 
+
 class TestMultiPlatformRun:
     """Test run orchestration across different platforms."""
 
@@ -397,6 +407,7 @@ def test_run_multi_model_continues_on_failure(
 # Integration Tests (Full Flow)
 # ============================================================================
 
+
 class TestEndToEndIntegration:
     """Integration tests for complete build + run workflows."""
 
@@ -465,7 +476,9 @@ def test_build_then_run_workflow(
                         "total_runs": 1,
                     },
                 ):
-                    result = run_orchestrator.execute(manifest_file="build_manifest.json")
+                    result = run_orchestrator.execute(
+                        manifest_file="build_manifest.json"
+                    )
 
                     assert len(result["successful_runs"]) == 1
                     assert len(result["failed_runs"]) == 0
@@ -475,6 +488,7 @@ def test_build_then_run_workflow(
 # Platform-Specific Behavior Tests
 # ============================================================================
 
+
 class TestPlatformSpecificBehavior:
     """Test platform-specific behaviors and edge cases."""
 
@@ -553,6 +567,7 @@ def test_cpu_only_execution(self, cpu_context, mock_run_args, temp_manifest_file
 # Multi-GPU architecture (Dockerfile parsing, normalization, image filtering)
 # ============================================================================
 
+
 class TestMultiGPUArch:
     """Multi-arch DockerBuilder logic, dockerfile_utils, and run-phase image filtering."""
 
@@ -573,27 +588,46 @@ def setup_method(self):
     @patch.object(DockerBuilder, "_get_dockerfiles_for_model")
     @patch.object(DockerBuilder, "_check_dockerfile_has_gpu_variables")
     @patch.object(DockerBuilder, "build_image")
-    def test_multi_arch_build_image_naming(self, mock_build_image, mock_check_gpu_vars, mock_get_dockerfiles):
+    def test_multi_arch_build_image_naming(
+        self, mock_build_image, mock_check_gpu_vars, mock_get_dockerfiles
+    ):
         model_info = {"name": "dummy", "dockerfile": "docker/dummy.Dockerfile"}
         mock_get_dockerfiles.return_value = ["docker/dummy.Dockerfile"]
         mock_check_gpu_vars.return_value = (True, "docker/dummy.Dockerfile")
-        mock_build_image.return_value = {"docker_image": "ci-dummy_dummy.ubuntu.amd_gfx908", "build_duration": 1.0}
-        result = self.builder._build_model_for_arch(model_info, "gfx908", None, False, None, "", None)
+        mock_build_image.return_value = {
+            "docker_image": "ci-dummy_dummy.ubuntu.amd_gfx908",
+            "build_duration": 1.0,
+        }
+        result = self.builder._build_model_for_arch(
+            model_info, "gfx908", None, False, None, "", None
+        )
         assert result[0]["docker_image"].endswith("_gfx908")
         mock_check_gpu_vars.return_value = (False, "docker/dummy.Dockerfile")
-        mock_build_image.return_value = {"docker_image": "ci-dummy_dummy.ubuntu.amd", "build_duration": 1.0}
-        result = self.builder._build_model_for_arch(model_info, "gfx908", None, False, None, "", None)
+        mock_build_image.return_value = {
+            "docker_image": "ci-dummy_dummy.ubuntu.amd",
+            "build_duration": 1.0,
+        }
+        result = self.builder._build_model_for_arch(
+            model_info, "gfx908", None, False, None, "", None
+        )
         assert not result[0]["docker_image"].endswith("_gfx908")
 
     @patch.object(DockerBuilder, "_get_dockerfiles_for_model")
     @patch.object(DockerBuilder, "_check_dockerfile_has_gpu_variables")
     @patch.object(DockerBuilder, "build_image")
-    def test_multi_arch_manifest_fields(self, mock_build_image, mock_check_gpu_vars, mock_get_dockerfiles):
+    def test_multi_arch_manifest_fields(
+        self, mock_build_image, mock_check_gpu_vars, mock_get_dockerfiles
+    ):
         model_info = {"name": "dummy", "dockerfile": "docker/dummy.Dockerfile"}
         mock_get_dockerfiles.return_value = ["docker/dummy.Dockerfile"]
         mock_check_gpu_vars.return_value = (True, "docker/dummy.Dockerfile")
-        mock_build_image.return_value = {"docker_image": "ci-dummy_dummy.ubuntu.amd_gfx908", "build_duration": 1.0}
-        result = self.builder._build_model_for_arch(model_info, "gfx908", None, False, None, "", None)
+        mock_build_image.return_value = {
+            "docker_image": "ci-dummy_dummy.ubuntu.amd_gfx908",
+            "build_duration": 1.0,
+        }
+        result = self.builder._build_model_for_arch(
+            model_info, "gfx908", None, False, None, "", None
+        )
         assert result[0]["gpu_architecture"] == "gfx908"
 
     @patch.object(DockerBuilder, "_get_dockerfiles_for_model")
@@ -601,17 +635,31 @@ def test_multi_arch_manifest_fields(self, mock_build_image, mock_check_gpu_vars,
     def test_legacy_single_arch_build(self, mock_build_image, mock_get_dockerfiles):
         model_info = {"name": "dummy", "dockerfile": "docker/dummy.Dockerfile"}
         mock_get_dockerfiles.return_value = ["docker/dummy.Dockerfile"]
-        mock_build_image.return_value = {"docker_image": "ci-dummy_dummy.ubuntu.amd", "build_duration": 1.0}
-        result = self.builder._build_model_single_arch(model_info, None, False, None, "", None)
+        mock_build_image.return_value = {
+            "docker_image": "ci-dummy_dummy.ubuntu.amd",
+            "build_duration": 1.0,
+        }
+        result = self.builder._build_model_single_arch(
+            model_info, None, False, None, "", None
+        )
         assert result[0]["docker_image"] == "ci-dummy_dummy.ubuntu.amd"
 
     @patch.object(DockerBuilder, "_build_model_single_arch")
     def test_additional_context_overrides_target_archs(self, mock_single_arch):
-        self.context.ctx = {"docker_build_arg": {"MAD_SYSTEM_GPU_ARCHITECTURE": "gfx908"}}
+        self.context.ctx = {
+            "docker_build_arg": {"MAD_SYSTEM_GPU_ARCHITECTURE": "gfx908"}
+        }
         model_info = {"name": "dummy", "dockerfile": "docker/dummy.Dockerfile"}
-        mock_single_arch.return_value = [{"docker_image": "ci-dummy_dummy.ubuntu.amd", "build_duration": 1.0}]
-        result = self.builder.build_all_models([model_info], target_archs=["gfx908", "gfx90a"])
-        assert result["successful_builds"][0]["docker_image"] == "ci-dummy_dummy.ubuntu.amd"
+        mock_single_arch.return_value = [
+            {"docker_image": "ci-dummy_dummy.ubuntu.amd", "build_duration": 1.0}
+        ]
+        result = self.builder.build_all_models(
+            [model_info], target_archs=["gfx908", "gfx90a"]
+        )
+        assert (
+            result["successful_builds"][0]["docker_image"]
+            == "ci-dummy_dummy.ubuntu.amd"
+        )
 
     def test_parse_dockerfile_gpu_variables(self):
         content = """
@@ -640,20 +688,41 @@ def test_parse_malformed_dockerfile(self):
 
     def test_normalize_architecture_name(self):
         cases = {
-            "gfx908": "gfx908", "GFX908": "gfx908", "mi100": "gfx908", "mi-100": "gfx908",
-            "mi200": "gfx90a", "mi-200": "gfx90a", "mi210": "gfx90a", "mi250": "gfx90a",
-            "mi300": "gfx940", "mi-300": "gfx940", "mi300a": "gfx940",
-            "mi300x": "gfx942", "mi-300x": "gfx942", "unknown": "unknown", "": None,
+            "gfx908": "gfx908",
+            "GFX908": "gfx908",
+            "mi100": "gfx908",
+            "mi-100": "gfx908",
+            "mi200": "gfx90a",
+            "mi-200": "gfx90a",
+            "mi210": "gfx90a",
+            "mi250": "gfx90a",
+            "mi300": "gfx940",
+            "mi-300": "gfx940",
+            "mi300a": "gfx940",
+            "mi300x": "gfx942",
+            "mi-300x": "gfx942",
+            "unknown": "unknown",
+            "": None,
         }
         for inp, expected in cases.items():
             assert normalize_architecture_name(inp) == expected
 
     def test_is_target_arch_compatible_with_variable(self):
-        assert is_target_arch_compatible_with_variable("MAD_SYSTEM_GPU_ARCHITECTURE", ["gfx908"], "gfx942")
-        assert is_target_arch_compatible_with_variable("PYTORCH_ROCM_ARCH", ["gfx908", "gfx942"], "gfx942")
-        assert not is_target_arch_compatible_with_variable("PYTORCH_ROCM_ARCH", ["gfx908"], "gfx942")
-        assert is_target_arch_compatible_with_variable("GFX_COMPILATION_ARCH", ["gfx908"], "gfx908")
-        assert not is_target_arch_compatible_with_variable("GFX_COMPILATION_ARCH", ["gfx908"], "gfx942")
+        assert is_target_arch_compatible_with_variable(
+            "MAD_SYSTEM_GPU_ARCHITECTURE", ["gfx908"], "gfx942"
+        )
+        assert is_target_arch_compatible_with_variable(
+            "PYTORCH_ROCM_ARCH", ["gfx908", "gfx942"], "gfx942"
+        )
+        assert not is_target_arch_compatible_with_variable(
+            "PYTORCH_ROCM_ARCH", ["gfx908"], "gfx942"
+        )
+        assert is_target_arch_compatible_with_variable(
+            "GFX_COMPILATION_ARCH", ["gfx908"], "gfx908"
+        )
+        assert not is_target_arch_compatible_with_variable(
+            "GFX_COMPILATION_ARCH", ["gfx908"], "gfx942"
+        )
         assert is_target_arch_compatible_with_variable("UNKNOWN_VAR", ["foo"], "bar")
 
     def test_is_compilation_arch_compatible(self):
@@ -664,4 +733,3 @@ def test_is_compilation_arch_compatible(self):
 
 if __name__ == "__main__":
     pytest.main([__file__, "-v", "--tb=short"])
-
diff --git a/tests/integration/test_profiling_tools_config.py b/tests/integration/test_profiling_tools_config.py
index 7dc36f94..4659b608 100644
--- a/tests/integration/test_profiling_tools_config.py
+++ b/tests/integration/test_profiling_tools_config.py
@@ -27,7 +27,9 @@ def test_rocm_trace_lite_config_and_apply_tools():
     assert cfg_default["env_vars"].get("RTL_MODE") == "default"
     assert cfg_default["cmd"] == cfg["cmd"]
 
-    wrapper = get_madengine_root() / "scripts" / "common" / "tools" / "rtl_trace_wrapper.sh"
+    wrapper = (
+        get_madengine_root() / "scripts" / "common" / "tools" / "rtl_trace_wrapper.sh"
+    )
     assert wrapper.is_file()
 
     ctx = MagicMock()
diff --git a/tests/unit/test_additional_context_defaults.py b/tests/unit/test_additional_context_defaults.py
index f42d9c2e..10964ec8 100644
--- a/tests/unit/test_additional_context_defaults.py
+++ b/tests/unit/test_additional_context_defaults.py
@@ -1,8 +1,8 @@
 """Tests for madengine.core.additional_context_defaults."""
 
 from madengine.core.additional_context_defaults import (
-    DEFAULT_GUEST_OS,
     DEFAULT_GPU_VENDOR,
+    DEFAULT_GUEST_OS,
     apply_build_context_defaults,
 )
 
diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py
index 47d4a6c7..1ce039dc 100644
--- a/tests/unit/test_auth.py
+++ b/tests/unit/test_auth.py
@@ -1,9 +1,10 @@
 """Unit tests for madengine.core.auth module."""
 
 import os
-import pytest
 from unittest.mock import MagicMock, mock_open, patch
 
+import pytest
+
 from madengine.core.auth import load_credentials, login_to_registry
 
 
@@ -134,14 +135,22 @@ def test_missing_registry_key_raises_when_raise_on_failure(self):
         console, rich_console = self._mocks()
         credentials = {"other_registry": {"username": "u", "password": "p"}}
         with pytest.raises(RuntimeError, match="myregistry.io"):
-            login_to_registry("myregistry.io", credentials, console, rich_console, raise_on_failure=True)
+            login_to_registry(
+                "myregistry.io",
+                credentials,
+                console,
+                rich_console,
+                raise_on_failure=True,
+            )
         console.sh.assert_not_called()
 
     def test_missing_registry_key_returns_when_not_raise_on_failure(self):
         """Returns silently when registry key absent and raise_on_failure=False."""
         console, rich_console = self._mocks()
         credentials = {"other_registry": {"username": "u", "password": "p"}}
-        login_to_registry("myregistry.io", credentials, console, rich_console, raise_on_failure=False)
+        login_to_registry(
+            "myregistry.io", credentials, console, rich_console, raise_on_failure=False
+        )
         console.sh.assert_not_called()
 
     def test_invalid_credentials_format_raises(self):
@@ -149,14 +158,18 @@ def test_invalid_credentials_format_raises(self):
         console, rich_console = self._mocks()
         credentials = {"dockerhub": {"token": "abc"}}
         with pytest.raises(RuntimeError, match="username|password"):
-            login_to_registry("docker.io", credentials, console, rich_console, raise_on_failure=True)
+            login_to_registry(
+                "docker.io", credentials, console, rich_console, raise_on_failure=True
+            )
         console.sh.assert_not_called()
 
     def test_invalid_credentials_format_returns_when_not_raise_on_failure(self):
         """Returns silently when credentials format invalid and raise_on_failure=False."""
         console, rich_console = self._mocks()
         credentials = {"dockerhub": {"token": "abc"}}
-        login_to_registry("docker.io", credentials, console, rich_console, raise_on_failure=False)
+        login_to_registry(
+            "docker.io", credentials, console, rich_console, raise_on_failure=False
+        )
         console.sh.assert_not_called()
 
     def test_docker_io_normalised_to_dockerhub(self):
@@ -184,12 +197,16 @@ def test_login_failure_raises_when_raise_on_failure(self):
         console.sh.side_effect = RuntimeError("auth failed")
         credentials = {"dockerhub": {"username": "user", "password": "pass"}}
         with pytest.raises(RuntimeError, match="auth failed"):
-            login_to_registry(None, credentials, console, rich_console, raise_on_failure=True)
+            login_to_registry(
+                None, credentials, console, rich_console, raise_on_failure=True
+            )
 
     def test_login_failure_suppressed_when_not_raise_on_failure(self):
         """docker login error is suppressed when raise_on_failure=False."""
         console, rich_console = self._mocks()
         console.sh.side_effect = RuntimeError("auth failed")
         credentials = {"dockerhub": {"username": "user", "password": "pass"}}
-        login_to_registry(None, credentials, console, rich_console, raise_on_failure=False)
+        login_to_registry(
+            None, credentials, console, rich_console, raise_on_failure=False
+        )
         # Should not propagate the exception
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index 164ac7a5..bbed4a71 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -16,8 +16,8 @@
 import importlib
 import json
 import os
-from io import StringIO
 import tempfile
+from io import StringIO
 from unittest.mock import MagicMock, patch
 
 # third-party modules
@@ -28,34 +28,34 @@
 
 # project modules
 from madengine.cli import (
-    app,
-    setup_logging,
-    create_args_namespace,
-    validate_additional_context,
-    save_summary_with_feedback,
-    display_results_table,
-    ExitCode,
-    VALID_GPU_VENDORS,
-    VALID_GUEST_OS,
+    DEFAULT_DATA_CONFIG,
     DEFAULT_MANIFEST_FILE,
     DEFAULT_PERF_OUTPUT,
-    DEFAULT_DATA_CONFIG,
-    DEFAULT_TOOLS_CONFIG,
     DEFAULT_TIMEOUT,
+    DEFAULT_TOOLS_CONFIG,
+    VALID_GPU_VENDORS,
+    VALID_GUEST_OS,
+    ExitCode,
+    app,
+    create_args_namespace,
+    display_results_table,
+    save_summary_with_feedback,
+    setup_logging,
+    validate_additional_context,
 )
 from tests.fixtures.utils import (
     BASE_DIR,
     MODEL_DIR,
+    generate_additional_context_for_machine,
     has_gpu,
     requires_gpu,
-    generate_additional_context_for_machine,
 )
 
-
 # ============================================================================
 # CLI Utilities Tests
 # ============================================================================
 
+
 class TestSetupLogging:
     """Test the setup_logging function."""
 
@@ -159,8 +159,16 @@ def test_display_results_table_build_shows_gpu_arch_from_docker_builder(self):
         """Multi-arch builds record gpu_architecture; table must show it, not N/A."""
         summary = {
             "successful_builds": [
-                {"model": "dummy", "docker_image": "ci-dummy_dummy.ubuntu.amd_gfx90a", "gpu_architecture": "gfx90a"},
-                {"model": "dummy", "docker_image": "ci-dummy_dummy.ubuntu.amd_gfx942", "gpu_architecture": "gfx942"},
+                {
+                    "model": "dummy",
+                    "docker_image": "ci-dummy_dummy.ubuntu.amd_gfx90a",
+                    "gpu_architecture": "gfx90a",
+                },
+                {
+                    "model": "dummy",
+                    "docker_image": "ci-dummy_dummy.ubuntu.amd_gfx942",
+                    "gpu_architecture": "gfx942",
+                },
             ],
             "failed_builds": [],
         }
@@ -209,6 +217,7 @@ def test_display_results_table_run_results(self):
 # CLI Validation Tests
 # ============================================================================
 
+
 class TestValidateAdditionalContext:
     """Test the validate_additional_context function."""
 
@@ -283,20 +292,20 @@ class TestProcessBatchManifest:
     def test_process_batch_manifest_valid_mixed_build_new(self):
         """Test processing batch manifest with mixed build_new values - core functionality."""
         from madengine.cli.validators import process_batch_manifest
-        
+
         batch_data = [
             {"model_name": "model1", "build_new": True},
             {"model_name": "model2", "build_new": False},
             {"model_name": "model3", "build_new": True},
         ]
-        
+
         with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
             json.dump(batch_data, f)
             temp_file = f.name
-        
+
         try:
             result = process_batch_manifest(temp_file)
-            
+
             # Only models with build_new=True should be in build_tags
             assert result["build_tags"] == ["model1", "model3"]
             # All models should be in all_tags
@@ -308,19 +317,19 @@ def test_process_batch_manifest_valid_mixed_build_new(self):
     def test_process_batch_manifest_default_build_new_false(self):
         """Test that build_new defaults to false when not specified."""
         from madengine.cli.validators import process_batch_manifest
-        
+
         batch_data = [
             {"model_name": "model1"},  # No build_new field
             {"model_name": "model2", "build_new": True},
         ]
-        
+
         with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
             json.dump(batch_data, f)
             temp_file = f.name
-        
+
         try:
             result = process_batch_manifest(temp_file)
-            
+
             # model1 should not be in build_tags (defaults to false)
             assert result["build_tags"] == ["model2"]
             assert result["all_tags"] == ["model1", "model2"]
@@ -330,28 +339,24 @@ def test_process_batch_manifest_default_build_new_false(self):
     def test_process_batch_manifest_with_registry_fields(self):
         """Test per-model registry override - key feature."""
         from madengine.cli.validators import process_batch_manifest
-        
+
         batch_data = [
             {
                 "model_name": "model1",
                 "build_new": True,
                 "registry": "docker.io/myorg",
-                "registry_image": "myorg/model1"
-            },
-            {
-                "model_name": "model2",
-                "build_new": True,
-                "registry": "gcr.io/myproject"
+                "registry_image": "myorg/model1",
             },
+            {"model_name": "model2", "build_new": True, "registry": "gcr.io/myproject"},
         ]
-        
+
         with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
             json.dump(batch_data, f)
             temp_file = f.name
-        
+
         try:
             result = process_batch_manifest(temp_file)
-            
+
             # Verify registry metadata is preserved
             assert result["manifest_data"][0]["registry"] == "docker.io/myorg"
             assert result["manifest_data"][0]["registry_image"] == "myorg/model1"
@@ -362,17 +367,17 @@ def test_process_batch_manifest_with_registry_fields(self):
     def test_process_batch_manifest_error_handling(self):
         """Test error handling for various invalid inputs."""
         from madengine.cli.validators import process_batch_manifest
-        
+
         # File not found
         with pytest.raises(FileNotFoundError) as exc_info:
             process_batch_manifest("non_existent_file.json")
         assert "Batch manifest file not found" in str(exc_info.value)
-        
+
         # Invalid JSON
         with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
             f.write("invalid json content{")
             temp_file = f.name
-        
+
         try:
             with pytest.raises(ValueError) as exc_info:
                 process_batch_manifest(temp_file)
@@ -383,26 +388,26 @@ def test_process_batch_manifest_error_handling(self):
     def test_process_batch_manifest_validation(self):
         """Test validation rules for batch manifest."""
         from madengine.cli.validators import process_batch_manifest
-        
+
         # Not a list
         batch_data = {"model_name": "model1", "build_new": True}
         with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
             json.dump(batch_data, f)
             temp_file = f.name
-        
+
         try:
             with pytest.raises(ValueError) as exc_info:
                 process_batch_manifest(temp_file)
             assert "must be a list" in str(exc_info.value)
         finally:
             os.unlink(temp_file)
-        
+
         # Missing model_name
         batch_data = [{"build_new": True}]
         with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
             json.dump(batch_data, f)
             temp_file = f.name
-        
+
         try:
             with pytest.raises(ValueError) as exc_info:
                 process_batch_manifest(temp_file)
@@ -415,6 +420,7 @@ def test_process_batch_manifest_validation(self):
 # CLI exit code and error handling tests (CI / Jenkins smoke)
 # ============================================================================
 
+
 @pytest.fixture
 def runner() -> CliRunner:
     return CliRunner()
diff --git a/tests/unit/test_config_integration.py b/tests/unit/test_config_integration.py
new file mode 100644
index 00000000..fdf32bd8
--- /dev/null
+++ b/tests/unit/test_config_integration.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+"""Integration tests for load_config end-to-end pipeline."""
+
+from pathlib import Path
+
+import pytest
+
+from madengine.config import load_config
+from madengine.core.errors import ConfigurationError
+
+FIXTURES_DIR = Path(__file__).parent.parent / "fixtures" / "configs"
+
+
+class TestLoadConfigEndToEnd:
+    def test_defaults_produce_valid_context(self):
+        ctx, meta = load_config([])
+        assert ctx["gpu_vendor"] == "AMD"
+        assert ctx["guest_os"] == "UBUNTU"
+        assert meta["model"]["tags"] == []
+
+    def test_scheduler_slurm(self):
+        ctx, meta = load_config(["scheduler=slurm"])
+        assert "slurm" in ctx
+        assert ctx["slurm"]["partition"] == "amd-rccl"
+
+    def test_launcher_torchrun(self):
+        ctx, meta = load_config(["launcher=torchrun"])
+        assert ctx["distributed"]["enabled"] is True
+        assert ctx["distributed"]["launcher"] == "torchrun"
+
+    def test_combined_overrides(self):
+        ctx, meta = load_config(
+            [
+                "scheduler=slurm",
+                "launcher=torchrun",
+                "distributed.nnodes=4",
+                "+env=nccl_debug",
+            ]
+        )
+        assert ctx["distributed"]["nnodes"] == 4
+        assert ctx["env_vars"]["NCCL_DEBUG"] == "INFO"
+        assert "slurm" in ctx
+
+    def test_user_yaml_file(self):
+        yaml_path = str(FIXTURES_DIR / "test_slurm_job.yaml")
+        ctx, meta = load_config([yaml_path])
+        assert meta["model"]["tags"] == ["dummy"]
+        assert ctx["slurm"]["partition"] == "test-partition"
+        assert ctx["distributed"]["nnodes"] == 2
+        assert ctx["env_vars"]["MY_VAR"] == "test_value"
+        assert ctx["debug"] is True
+
+    def test_user_yaml_with_override(self):
+        # User YAML is merged last (highest priority). Overrides for keys present in
+        # the user YAML are overwritten by it; new keys added via '+' syntax survive.
+        yaml_path = str(FIXTURES_DIR / "test_slurm_job.yaml")
+        ctx, meta = load_config([yaml_path, "+env_vars.EXTRA_VAR=hello"])
+        # MY_VAR from user YAML is preserved
+        assert ctx["env_vars"]["MY_VAR"] == "test_value"
+        # EXTRA_VAR added via '+' override is also present
+        assert ctx["env_vars"]["EXTRA_VAR"] == "hello"
+
+    def test_docker_keys_translated(self):
+        # Appending to an empty dict in Hydra requires the '+' prefix
+        ctx, meta = load_config(["+docker.build_args.KEY=val"])
+        assert ctx["docker_build_arg"]["KEY"] == "val"
+
+    def test_slurm_and_k8s_conflict_raises(self):
+        # scheduler=slurm adds 'slurm' section; +k8s.namespace appends 'k8s' section.
+        # Validator detects both and raises ConfigurationError.
+        with pytest.raises(ConfigurationError, match="Cannot specify both"):
+            load_config(["scheduler=slurm", "+k8s.namespace=test"])
+
+    def test_unsupported_platform_raises(self):
+        with pytest.raises(ConfigurationError, match="not yet supported"):
+            load_config(["platform=bare_metal"])
+
+    def test_container_image_promoted(self):
+        ctx, meta = load_config(["model.container_image=myimage:latest"])
+        assert ctx["MAD_CONTAINER_IMAGE"] == "myimage:latest"
+
+    def test_model_tags_in_metadata(self):
+        ctx, meta = load_config(["model.tags=[dummy,bert]"])
+        assert meta["model"]["tags"] == ["dummy", "bert"]
+        assert "model" not in ctx
+
+    def test_profile_append(self):
+        ctx, meta = load_config(["+profile=mi300x_8gpu"])
+        assert ctx["gpu_type"] == "mi300x"
+        assert ctx["env_vars"]["HSA_ENABLE_SDMA"] == "0"
+
+    def test_tools_append(self):
+        ctx, meta = load_config(["+tools=rocprofv3_lightweight"])
+        assert len(ctx["tools"]) == 1
+        assert ctx["tools"][0]["name"] == "rocprofv3_lightweight"
diff --git a/tests/unit/test_config_loader.py b/tests/unit/test_config_loader.py
index 65184e68..5ca27829 100644
--- a/tests/unit/test_config_loader.py
+++ b/tests/unit/test_config_loader.py
@@ -14,9 +14,9 @@
 """
 
 import json
-import pytest
 from pathlib import Path
 
+import pytest
 from jinja2 import Template
 
 from madengine.deployment.config_loader import ConfigLoader, apply_deployment_config
@@ -42,24 +42,20 @@ def load_config_file(relative_path):
     full_path = get_project_root() / relative_path
     if not full_path.exists():
         pytest.skip(f"Config file not found: {relative_path}")
-    
+
     with open(full_path) as f:
         return json.load(f)
 
 
 class TestConfigLoaderBasics:
     """Test basic ConfigLoader functionality."""
-    
+
     def test_minimal_single_gpu(self):
         """Test minimal single GPU config gets proper defaults."""
-        user_config = {
-            "k8s": {
-                "gpu_count": 1
-            }
-        }
-        
+        user_config = {"k8s": {"gpu_count": 1}}
+
         result = ConfigLoader.load_k8s_config(user_config)
-        
+
         # Validate defaults applied
         assert result["k8s"]["gpu_count"] == 1
         assert result["k8s"]["memory"] == "16Gi"
@@ -67,22 +63,16 @@ def test_minimal_single_gpu(self):
         assert result["k8s"]["namespace"] == "default"
         assert result["gpu_vendor"] == "AMD"
         assert "OMP_NUM_THREADS" in result["env_vars"]
-    
+
     def test_minimal_multi_gpu(self):
         """Test minimal multi-GPU config gets proper defaults."""
         user_config = {
-            "k8s": {
-                "gpu_count": 2
-            },
-            "distributed": {
-                "launcher": "torchrun",
-                "nnodes": 1,
-                "nproc_per_node": 2
-            }
+            "k8s": {"gpu_count": 2},
+            "distributed": {"launcher": "torchrun", "nnodes": 1, "nproc_per_node": 2},
         }
-        
+
         result = ConfigLoader.load_k8s_config(user_config)
-        
+
         # Validate multi-GPU defaults
         assert result["k8s"]["gpu_count"] == 2
         assert result["k8s"]["memory"] == "64Gi"
@@ -91,63 +81,49 @@ def test_minimal_multi_gpu(self):
         assert result["env_vars"]["NCCL_DEBUG"] == "WARN"
         assert "MIOPEN_FIND_MODE" in result["env_vars"]
         assert result["distributed"]["backend"] == "nccl"
-    
+
     def test_minimal_multi_node(self):
         """Test minimal multi-node config gets proper defaults."""
         user_config = {
-            "k8s": {
-                "gpu_count": 2
-            },
-            "distributed": {
-                "launcher": "torchrun",
-                "nnodes": 2,
-                "nproc_per_node": 2
-            }
+            "k8s": {"gpu_count": 2},
+            "distributed": {"launcher": "torchrun", "nnodes": 2, "nproc_per_node": 2},
         }
-        
+
         result = ConfigLoader.load_k8s_config(user_config)
-        
+
         # Validate multi-node defaults
         assert result["k8s"]["host_ipc"] == True
         assert "NCCL_DEBUG_SUBSYS" in result["env_vars"]
         assert "NCCL_TIMEOUT" in result["env_vars"]
-    
+
     def test_nvidia_config(self):
         """Test NVIDIA GPU config gets proper defaults."""
         user_config = {
             "gpu_vendor": "NVIDIA",
-            "k8s": {
-                "gpu_count": 4
-            },
-            "distributed": {
-                "launcher": "torchrun",
-                "nnodes": 1,
-                "nproc_per_node": 4
-            }
+            "k8s": {"gpu_count": 4},
+            "distributed": {"launcher": "torchrun", "nnodes": 1, "nproc_per_node": 4},
         }
-        
+
         result = ConfigLoader.load_k8s_config(user_config)
-        
+
         # Validate NVIDIA defaults
         assert result["k8s"]["gpu_resource_name"] == "nvidia.com/gpu"
         assert "NCCL_P2P_DISABLE" in result["env_vars"]
         assert result["env_vars"]["OMP_NUM_THREADS"] == "12"
-    
+
     def test_override_behavior(self):
         """Test that user overrides work correctly."""
         user_config = {
             "k8s": {
                 "gpu_count": 1,
                 "namespace": "custom-namespace",
-                "memory": "32Gi"  # Override default 16Gi
+                "memory": "32Gi",  # Override default 16Gi
             },
-            "env_vars": {
-                "CUSTOM_VAR": "custom_value"
-            }
+            "env_vars": {"CUSTOM_VAR": "custom_value"},
         }
-        
+
         result = ConfigLoader.load_k8s_config(user_config)
-        
+
         # Validate overrides
         assert result["k8s"]["namespace"] == "custom-namespace"
         assert result["k8s"]["memory"] == "32Gi"  # Overridden
@@ -161,8 +137,10 @@ class TestApplyDeploymentConfig:
 
     def test_apply_slurm_config_mutates_and_returns(self):
         """apply_deployment_config mutates config.additional_context and returns full config."""
+
         class FakeConfig:
             additional_context = {"slurm": {"nodes": 2}}
+
         config = FakeConfig()
         result = apply_deployment_config(config, ConfigLoader.load_slurm_config)
         assert result is config.additional_context
@@ -171,8 +149,10 @@ class FakeConfig:
 
     def test_apply_k8s_config_mutates_and_returns(self):
         """apply_deployment_config with load_k8s_config mutates and returns full config."""
+
         class FakeConfig:
             additional_context = {"k8s": {"gpu_count": 1}}
+
         config = FakeConfig()
         result = apply_deployment_config(config, ConfigLoader.load_k8s_config)
         assert result is config.additional_context
@@ -182,31 +162,39 @@ class FakeConfig:
 
 class TestConfigLoaderK8sConfigs:
     """Test with actual K8s config files (if they exist)."""
-    
+
     @pytest.mark.skipif(
-        not config_exists("examples/k8s-configs/basic/01-native-single-node-single-gpu.json"),
-        reason="K8s config file not found"
+        not config_exists(
+            "examples/k8s-configs/basic/01-native-single-node-single-gpu.json"
+        ),
+        reason="K8s config file not found",
     )
     def test_k8s_single_gpu_config(self):
         """Test K8s single GPU config file."""
-        user_config = load_config_file("examples/k8s-configs/basic/01-native-single-node-single-gpu.json")
+        user_config = load_config_file(
+            "examples/k8s-configs/basic/01-native-single-node-single-gpu.json"
+        )
         result = ConfigLoader.load_k8s_config(user_config)
-        
+
         # Validate key fields are preserved
         assert result["k8s"]["gpu_count"] == 1
         assert "memory" in result["k8s"]
         assert "namespace" in result["k8s"]
         assert result["gpu_vendor"] in ["AMD", "NVIDIA"]
-    
+
     @pytest.mark.skipif(
-        not config_exists("examples/k8s-configs/basic/02-torchrun-single-node-multi-gpu.json"),
-        reason="K8s multi-GPU config file not found"
+        not config_exists(
+            "examples/k8s-configs/basic/02-torchrun-single-node-multi-gpu.json"
+        ),
+        reason="K8s multi-GPU config file not found",
     )
     def test_k8s_multi_gpu_config(self):
         """Test K8s multi-GPU config file."""
-        user_config = load_config_file("examples/k8s-configs/basic/02-torchrun-single-node-multi-gpu.json")
+        user_config = load_config_file(
+            "examples/k8s-configs/basic/02-torchrun-single-node-multi-gpu.json"
+        )
         result = ConfigLoader.load_k8s_config(user_config)
-        
+
         # Validate multi-GPU config
         assert result["k8s"]["gpu_count"] >= 2
         assert "distributed" in result
@@ -216,36 +204,42 @@ def test_k8s_multi_gpu_config(self):
 
 class TestConfigLoaderSlurmConfigs:
     """Test with actual SLURM config files (if they exist)."""
-    
+
     @pytest.mark.skipif(
-        not config_exists("examples/slurm-configs/basic/01-single-node-single-gpu.json"),
-        reason="SLURM config file not found"
+        not config_exists(
+            "examples/slurm-configs/basic/01-single-node-single-gpu.json"
+        ),
+        reason="SLURM config file not found",
     )
     def test_slurm_single_gpu_config(self):
         """Test SLURM single GPU config file."""
-        user_config = load_config_file("examples/slurm-configs/basic/01-single-node-single-gpu.json")
+        user_config = load_config_file(
+            "examples/slurm-configs/basic/01-single-node-single-gpu.json"
+        )
         result = ConfigLoader.load_slurm_config(user_config)
-        
+
         # Validate SLURM config structure
         assert "slurm" in result
         assert result["slurm"]["nodes"] == 1
         assert result["slurm"]["gpus_per_node"] >= 1
-    
+
     @pytest.mark.skipif(
         not config_exists("examples/slurm-configs/basic/06-vllm-multi-node.json"),
-        reason="SLURM vLLM multi-node config file not found"
+        reason="SLURM vLLM multi-node config file not found",
     )
     def test_slurm_vllm_multi_node_config(self):
         """Test SLURM vLLM multi-node config file."""
-        user_config = load_config_file("examples/slurm-configs/basic/06-vllm-multi-node.json")
+        user_config = load_config_file(
+            "examples/slurm-configs/basic/06-vllm-multi-node.json"
+        )
         result = ConfigLoader.load_slurm_config(user_config)
-        
+
         # Validate multi-node vLLM config
         assert "slurm" in result
         assert result["slurm"]["nodes"] >= 2
         assert result["slurm"]["gpus_per_node"] >= 1
         assert "distributed" in result
-        
+
         # Check for new preflight node check parameters
         if "enable_node_check" in result["slurm"]:
             assert isinstance(result["slurm"]["enable_node_check"], bool)
@@ -290,88 +284,66 @@ def test_job_script_includes_nodelist_when_set(self):
 
 class TestConfigLoaderDeploymentType:
     """Test deployment type inference and validation."""
-    
+
     def test_auto_infer_k8s(self):
         """Test k8s deployment type is auto-inferred from k8s field presence."""
-        user_config = {
-            "k8s": {
-                "gpu_count": 1
-            }
-        }
-        
+        user_config = {"k8s": {"gpu_count": 1}}
+
         result = ConfigLoader.load_config(user_config)
-        
+
         # Validate k8s config was loaded and defaults applied
         assert "k8s" in result
         assert result["k8s"]["gpu_count"] == 1
         assert "memory" in result["k8s"]  # Default was applied
-    
+
     def test_auto_infer_slurm(self):
         """Test slurm deployment type is auto-inferred from slurm field presence."""
-        user_config = {
-            "slurm": {
-                "nodes": 1,
-                "gpus_per_node": 4
-            }
-        }
-        
+        user_config = {"slurm": {"nodes": 1, "gpus_per_node": 4}}
+
         result = ConfigLoader.load_config(user_config)
-        
+
         # Validate slurm config was loaded and defaults applied
         assert "slurm" in result
         assert result["slurm"]["nodes"] == 1
         assert result["slurm"]["gpus_per_node"] == 4
-    
+
     def test_auto_infer_local(self):
         """Test local deployment when no k8s/slurm present."""
-        user_config = {
-            "env_vars": {"MY_VAR": "value"}
-        }
-        
+        user_config = {"env_vars": {"MY_VAR": "value"}}
+
         result = ConfigLoader.load_config(user_config)
-        
+
         # Validate local config (no k8s or slurm fields)
         assert "k8s" not in result or result.get("k8s") == {}
         assert "slurm" not in result or result.get("slurm") == {}
         assert result["env_vars"]["MY_VAR"] == "value"
-    
+
     def test_conflict_k8s_and_slurm(self):
         """Test error when both k8s and slurm fields present."""
-        user_config = {
-            "k8s": {"gpu_count": 1},
-            "slurm": {"nodes": 2}
-        }
-        
+        user_config = {"k8s": {"gpu_count": 1}, "slurm": {"nodes": 2}}
+
         with pytest.raises(ValueError, match="Both 'k8s' and 'slurm'"):
             ConfigLoader.load_config(user_config)
-    
+
     def test_conflict_explicit_deploy_mismatch(self):
         """Test error when explicit deploy field conflicts with config presence."""
-        user_config = {
-            "deploy": "slurm",
-            "k8s": {"gpu_count": 1}
-        }
-        
+        user_config = {"deploy": "slurm", "k8s": {"gpu_count": 1}}
+
         with pytest.raises(ValueError, match="Conflicting deployment"):
             ConfigLoader.load_config(user_config)
-    
+
     def test_explicit_deploy_matching(self):
         """Test that explicit deploy field works when it matches config."""
-        user_config = {
-            "deploy": "k8s",
-            "k8s": {"gpu_count": 1}
-        }
-        
+        user_config = {"deploy": "k8s", "k8s": {"gpu_count": 1}}
+
         result = ConfigLoader.load_config(user_config)
-        
+
         # Should work fine since deploy matches k8s presence
         # The deploy field may or may not be preserved in result
         assert result["k8s"]["gpu_count"] == 1
         assert "memory" in result["k8s"]  # Defaults applied
 
 
-
 # Run pytest if executed directly
 if __name__ == "__main__":
     pytest.main([__file__, "-v", "-s"])
-
diff --git a/tests/unit/test_config_schema.py b/tests/unit/test_config_schema.py
new file mode 100644
index 00000000..ab6ad103
--- /dev/null
+++ b/tests/unit/test_config_schema.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+"""Tests for ConfigValidator."""
+
+from omegaconf import DictConfig, OmegaConf
+
+from madengine.config.schema import ConfigValidator
+
+
+def make_cfg(data: dict) -> DictConfig:
+    return OmegaConf.create(data)
+
+
+class TestConflictDetection:
+    def test_slurm_and_k8s_conflict(self):
+        cfg = make_cfg({"slurm": {"partition": "gpu"}, "k8s": {"namespace": "default"}})
+        errors = ConfigValidator.validate(cfg)
+        assert any("Cannot specify both" in e for e in errors)
+
+    def test_slurm_only_no_conflict(self):
+        cfg = make_cfg({"slurm": {"partition": "gpu"}})
+        errors = ConfigValidator.validate(cfg)
+        assert not any("Cannot specify both" in e for e in errors)
+
+    def test_k8s_only_no_conflict(self):
+        cfg = make_cfg({"k8s": {"namespace": "default"}})
+        errors = ConfigValidator.validate(cfg)
+        assert not any("Cannot specify both" in e for e in errors)
+
+
+class TestDistributedValidation:
+    def test_enabled_without_launcher(self):
+        cfg = make_cfg({"distributed": {"enabled": True}})
+        errors = ConfigValidator.validate(cfg)
+        assert any("requires distributed.launcher" in e for e in errors)
+
+    def test_enabled_with_launcher(self):
+        cfg = make_cfg({"distributed": {"enabled": True, "launcher": "torchrun"}})
+        errors = ConfigValidator.validate(cfg)
+        assert not any("requires distributed.launcher" in e for e in errors)
+
+    def test_invalid_nnodes(self):
+        cfg = make_cfg(
+            {"distributed": {"enabled": True, "launcher": "torchrun", "nnodes": -1}}
+        )
+        errors = ConfigValidator.validate(cfg)
+        assert any("positive integer" in e for e in errors)
+
+    def test_valid_nnodes(self):
+        cfg = make_cfg(
+            {"distributed": {"enabled": True, "launcher": "torchrun", "nnodes": 4}}
+        )
+        errors = ConfigValidator.validate(cfg)
+        assert not any("positive integer" in e for e in errors)
+
+
+class TestUnknownKeys:
+    def test_unknown_top_level_key(self):
+        cfg = make_cfg({"gpu_vendor": "AMD", "typo_key": "oops"})
+        errors = ConfigValidator.validate(cfg)
+        assert any("Unknown config key: 'typo_key'" in e for e in errors)
+
+    def test_known_keys_accepted(self):
+        cfg = make_cfg({"gpu_vendor": "AMD", "debug": True, "env_vars": {}})
+        errors = ConfigValidator.validate(cfg)
+        assert not any("Unknown config key" in e for e in errors)
+
+
+class TestPlatformValidation:
+    def test_unsupported_platform(self):
+        cfg = make_cfg({"platform": {"type": "bare_metal"}})
+        errors = ConfigValidator.validate(cfg)
+        assert any("not yet supported" in e for e in errors)
+
+    def test_docker_platform_ok(self):
+        cfg = make_cfg({"platform": {"type": "docker"}})
+        errors = ConfigValidator.validate(cfg)
+        assert not any("not yet supported" in e for e in errors)
diff --git a/tests/unit/test_config_translator.py b/tests/unit/test_config_translator.py
new file mode 100644
index 00000000..7d73fc87
--- /dev/null
+++ b/tests/unit/test_config_translator.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python3
+"""Tests for ConfigTranslator."""
+
+from omegaconf import DictConfig, OmegaConf
+
+from madengine.config.translator import ConfigTranslator
+
+
+def make_cfg(overrides: dict) -> DictConfig:
+    """Build a DictConfig from a base + overrides for testing."""
+    base = {
+        "model": {
+            "tags": [],
+            "manifest_file": None,
+            "container_image": None,
+            "skip_run": False,
+            "timeout": None,
+        },
+        "docker": {
+            "build_args": {},
+            "env_vars": {},
+            "mounts": {},
+            "gpus": None,
+            "cpus": None,
+            "additional_run_options": None,
+            "keep_alive": False,
+            "clean_cache": False,
+        },
+        "build": {
+            "registry": None,
+            "target_archs": [],
+            "manifest_output": "build_manifest.json",
+        },
+        "env_vars": {},
+        "debug": False,
+        "live_output": False,
+        "log_error": {"pattern_scan": True, "benign_patterns": [], "patterns": []},
+        "tools": [],
+        "pre_scripts": [],
+        "post_scripts": [],
+        "encapsulate_script": None,
+        "data_config": "data.json",
+        "output": "perf.csv",
+        "summary_output": None,
+        "gpu_vendor": "AMD",
+        "guest_os": "UBUNTU",
+        "runtime": {
+            "devices": [],
+            "capabilities": [],
+            "security_opts": [],
+            "network_mode": "host",
+            "ipc": "host",
+            "groups": [],
+            "use_gpu_flag": False,
+        },
+        "platform": {"type": "docker"},
+    }
+    merged = {**base, **overrides}
+    return OmegaConf.create(merged)
+
+
+class TestDockerKeyMapping:
+    def test_build_args_mapped(self):
+        cfg = make_cfg(
+            {
+                "docker": {
+                    "build_args": {"KEY": "val"},
+                    "env_vars": {},
+                    "mounts": {},
+                    "gpus": None,
+                    "cpus": None,
+                    "additional_run_options": None,
+                    "keep_alive": False,
+                    "clean_cache": False,
+                }
+            }
+        )
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["docker_build_arg"] == {"KEY": "val"}
+
+    def test_env_vars_mapped(self):
+        cfg = make_cfg(
+            {
+                "docker": {
+                    "build_args": {},
+                    "env_vars": {"A": "1"},
+                    "mounts": {},
+                    "gpus": None,
+                    "cpus": None,
+                    "additional_run_options": None,
+                    "keep_alive": False,
+                    "clean_cache": False,
+                }
+            }
+        )
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["docker_env_vars"] == {"A": "1"}
+
+    def test_null_gpus_excluded(self):
+        cfg = make_cfg({})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert "docker_gpus" not in ctx
+
+    def test_non_null_gpus_included(self):
+        cfg = make_cfg(
+            {
+                "docker": {
+                    "build_args": {},
+                    "env_vars": {},
+                    "mounts": {},
+                    "gpus": "0-3",
+                    "cpus": None,
+                    "additional_run_options": None,
+                    "keep_alive": False,
+                    "clean_cache": False,
+                }
+            }
+        )
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["docker_gpus"] == "0-3"
+
+
+class TestLogErrorMapping:
+    def test_pattern_scan_mapped(self):
+        cfg = make_cfg(
+            {
+                "log_error": {
+                    "pattern_scan": False,
+                    "benign_patterns": [],
+                    "patterns": [],
+                }
+            }
+        )
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["log_error_pattern_scan"] is False
+
+    def test_patterns_mapped(self):
+        cfg = make_cfg(
+            {
+                "log_error": {
+                    "pattern_scan": True,
+                    "benign_patterns": ["OK"],
+                    "patterns": ["ERR"],
+                }
+            }
+        )
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["log_error_benign_patterns"] == ["OK"]
+        assert ctx["log_error_patterns"] == ["ERR"]
+
+
+class TestPassthroughKeys:
+    def test_gpu_vendor_passthrough(self):
+        cfg = make_cfg({"gpu_vendor": "NVIDIA"})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["gpu_vendor"] == "NVIDIA"
+
+    def test_env_vars_passthrough(self):
+        cfg = make_cfg({"env_vars": {"MY": "VAR"}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["env_vars"] == {"MY": "VAR"}
+
+    def test_slurm_passthrough(self):
+        cfg = make_cfg({"slurm": {"partition": "gpu"}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["slurm"] == {"partition": "gpu"}
+
+    def test_distributed_passthrough(self):
+        cfg = make_cfg({"distributed": {"enabled": True, "launcher": "torchrun"}})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["distributed"]["launcher"] == "torchrun"
+
+    def test_tools_passthrough(self):
+        cfg = make_cfg({"tools": [{"name": "rpd"}]})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["tools"] == [{"name": "rpd"}]
+
+
+class TestExtractedKeys:
+    def test_model_extracted(self):
+        cfg = make_cfg(
+            {
+                "model": {
+                    "tags": ["dummy"],
+                    "manifest_file": None,
+                    "container_image": None,
+                    "skip_run": False,
+                    "timeout": 300,
+                }
+            }
+        )
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert "model" not in ctx
+        assert meta["model"]["tags"] == ["dummy"]
+        assert meta["model"]["timeout"] == 300
+
+    def test_build_extracted(self):
+        cfg = make_cfg(
+            {
+                "build": {
+                    "registry": "myregistry.io",
+                    "target_archs": ["gfx942"],
+                    "manifest_output": "build_manifest.json",
+                }
+            }
+        )
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert "build" not in ctx
+        assert meta["build"]["registry"] == "myregistry.io"
+
+    def test_platform_extracted(self):
+        cfg = make_cfg({})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert "platform" not in ctx
+        assert meta["platform"]["type"] == "docker"
+
+    def test_container_image_promoted(self):
+        cfg = make_cfg(
+            {
+                "model": {
+                    "tags": [],
+                    "manifest_file": None,
+                    "container_image": "myimage:latest",
+                    "skip_run": False,
+                    "timeout": None,
+                }
+            }
+        )
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert ctx["MAD_CONTAINER_IMAGE"] == "myimage:latest"
+
+    def test_runtime_extracted(self):
+        cfg = make_cfg({})
+        ctx, meta = ConfigTranslator.to_additional_context(cfg)
+        assert "runtime" not in ctx
+        assert "runtime" in meta
diff --git a/tests/unit/test_constants.py b/tests/unit/test_constants.py
index 248c8ac9..12d724a3 100644
--- a/tests/unit/test_constants.py
+++ b/tests/unit/test_constants.py
@@ -5,15 +5,15 @@
 from unittest.mock import patch
 
 from madengine.core.constants import (
-    NAS_NODES,
+    _DEFAULT_MAD_AWS_S3,
+    _DEFAULT_MAD_MINIO,
+    _DEFAULT_NAS_NODES,
+    _DEFAULT_PUBLIC_GITHUB_ROCM_KEY,
     MAD_AWS_S3,
     MAD_MINIO,
+    NAS_NODES,
     PUBLIC_GITHUB_ROCM_KEY,
     _get_env_or_creds_or_default,
-    _DEFAULT_NAS_NODES,
-    _DEFAULT_MAD_AWS_S3,
-    _DEFAULT_MAD_MINIO,
-    _DEFAULT_PUBLIC_GITHUB_ROCM_KEY,
 )
 
 
@@ -25,6 +25,7 @@ def test_env_override_returns_parsed_json(self):
         with patch.dict(os.environ, {"TEST_KEY": '[{"a": 1}]'}, clear=False):
             # Need to pass creds - we patch CREDS via the module
             import madengine.core.constants as constants_module
+
             with patch.object(constants_module, "CREDS", {}):
                 result = _get_env_or_creds_or_default(
                     "TEST_KEY", "TEST_KEY", _DEFAULT_NAS_NODES
@@ -35,6 +36,7 @@ def test_env_invalid_json_returns_default(self):
         """When env is set with invalid JSON, default is returned."""
         with patch.dict(os.environ, {"TEST_KEY": "not json"}, clear=False):
             import madengine.core.constants as constants_module
+
             with patch.object(constants_module, "CREDS", {}):
                 result = _get_env_or_creds_or_default(
                     "TEST_KEY", "TEST_KEY", _DEFAULT_NAS_NODES
@@ -51,6 +53,7 @@ def test_creds_fallback_when_env_unset(self):
             except Exception:
                 pass
         import madengine.core.constants as constants_module
+
         with patch.object(constants_module, "CREDS", {"TEST_KEY": [{"from": "creds"}]}):
             result = _get_env_or_creds_or_default(
                 "TEST_KEY", "TEST_KEY", _DEFAULT_NAS_NODES
@@ -60,6 +63,7 @@ def test_creds_fallback_when_env_unset(self):
     def test_default_when_env_and_creds_unset(self):
         """When env unset and creds missing key, default is returned."""
         import madengine.core.constants as constants_module
+
         with patch.dict(os.environ, {}, clear=False):
             if "TEST_KEY" in os.environ:
                 del os.environ["TEST_KEY"]
@@ -98,6 +102,7 @@ def test_mad_minio_has_expected_keys(self):
     def test_public_github_rocm_key_has_expected_keys(self):
         """PUBLIC_GITHUB_ROCM_KEY has username and token (no value assert to avoid leaking secrets)."""
         assert isinstance(PUBLIC_GITHUB_ROCM_KEY, dict)
-        assert set(PUBLIC_GITHUB_ROCM_KEY.keys()) >= {"username", "token"}, (
-            "PUBLIC_GITHUB_ROCM_KEY must have at least keys 'username' and 'token'"
-        )
+        assert set(PUBLIC_GITHUB_ROCM_KEY.keys()) >= {
+            "username",
+            "token",
+        }, "PUBLIC_GITHUB_ROCM_KEY must have at least keys 'username' and 'token'"
diff --git a/tests/unit/test_container_runner.py b/tests/unit/test_container_runner.py
index 2c10cbe7..8b0cbec0 100644
--- a/tests/unit/test_container_runner.py
+++ b/tests/unit/test_container_runner.py
@@ -12,7 +12,6 @@
 from madengine.deployment.base import PERFORMANCE_LOG_PATTERN
 from madengine.execution.container_runner import ContainerRunner
 
-
 PERF_PATTERN = PERFORMANCE_LOG_PATTERN
 
 
@@ -26,19 +25,34 @@ def _match(self, log_line):
     # --- formats that were already handled before the regex change ---
 
     def test_basic_integer(self):
-        assert self._match("performance: 12345 samples_per_second") == ("12345", "samples_per_second")
+        assert self._match("performance: 12345 samples_per_second") == (
+            "12345",
+            "samples_per_second",
+        )
 
     def test_decimal(self):
-        assert self._match("performance: 100.5 samples_per_second") == ("100.5", "samples_per_second")
+        assert self._match("performance: 100.5 samples_per_second") == (
+            "100.5",
+            "samples_per_second",
+        )
 
     def test_scientific_lowercase_e(self):
-        assert self._match("performance: 1.23e+4 samples_per_second") == ("1.23e+4", "samples_per_second")
+        assert self._match("performance: 1.23e+4 samples_per_second") == (
+            "1.23e+4",
+            "samples_per_second",
+        )
 
     def test_scientific_negative_exponent(self):
-        assert self._match("performance: 1.23e-4 samples_per_second") == ("1.23e-4", "samples_per_second")
+        assert self._match("performance: 1.23e-4 samples_per_second") == (
+            "1.23e-4",
+            "samples_per_second",
+        )
 
     def test_zero(self):
-        assert self._match("performance: 0 samples_per_second") == ("0", "samples_per_second")
+        assert self._match("performance: 0 samples_per_second") == (
+            "0",
+            "samples_per_second",
+        )
 
     def test_metric_with_digits(self):
         assert self._match("performance: 123 metric123") == ("123", "metric123")
@@ -50,15 +64,24 @@ def test_metric_starting_with_underscore(self):
 
     def test_unit_suffix_slash_s(self):
         """Value followed by /s unit suffix: suffix is stripped, metric parsed correctly."""
-        assert self._match("performance: 14164/s samples_per_second") == ("14164", "samples_per_second")
+        assert self._match("performance: 14164/s samples_per_second") == (
+            "14164",
+            "samples_per_second",
+        )
 
     def test_unit_suffix_and_comma(self):
         """Value with /s suffix and comma separator."""
-        assert self._match("performance: 14164.5/s, samples_per_second") == ("14164.5", "samples_per_second")
+        assert self._match("performance: 14164.5/s, samples_per_second") == (
+            "14164.5",
+            "samples_per_second",
+        )
 
     def test_comma_separator_no_suffix(self):
         """Comma after value without a unit suffix."""
-        assert self._match("performance: 100.5, samples_per_second") == ("100.5", "samples_per_second")
+        assert self._match("performance: 100.5, samples_per_second") == (
+            "100.5",
+            "samples_per_second",
+        )
 
     def test_comma_before_suffix(self):
         """Comma immediately before /s suffix: 123,/s metric."""
@@ -72,25 +95,40 @@ def test_comma_space_before_suffix(self):
 
     def test_scientific_uppercase_e(self):
         """Uppercase E in scientific notation (v1 supported, old v2 broke on this)."""
-        assert self._match("performance: 1.23E+4 samples_per_second") == ("1.23E+4", "samples_per_second")
+        assert self._match("performance: 1.23E+4 samples_per_second") == (
+            "1.23E+4",
+            "samples_per_second",
+        )
 
     def test_positive_sign(self):
         """Explicitly signed positive value (v1 supported via [+|-]? prefix)."""
-        assert self._match("performance: +123.45 samples_per_second") == ("+123.45", "samples_per_second")
+        assert self._match("performance: +123.45 samples_per_second") == (
+            "+123.45",
+            "samples_per_second",
+        )
 
     def test_negative_sign(self):
         """Signed negative value (v1 supported)."""
-        assert self._match("performance: -123.45 samples_per_second") == ("-123.45", "samples_per_second")
+        assert self._match("performance: -123.45 samples_per_second") == (
+            "-123.45",
+            "samples_per_second",
+        )
 
     def test_leading_dot_decimal(self):
         """Leading-dot decimal without integer part (v1 supported via [0-9]*[.]?[0-9]*)."""
-        assert self._match("performance: .5 samples_per_second") == (".5", "samples_per_second")
+        assert self._match("performance: .5 samples_per_second") == (
+            ".5",
+            "samples_per_second",
+        )
 
     # --- slash-containing metric names (e.g. samples/sec, tokens/sec) ---
 
     def test_metric_samples_per_sec_slash(self):
         """samples/sec metric (used by _determine_aggregation_method) is parsed."""
-        assert self._match("performance: 1234.5 samples/sec") == ("1234.5", "samples/sec")
+        assert self._match("performance: 1234.5 samples/sec") == (
+            "1234.5",
+            "samples/sec",
+        )
 
     def test_metric_tokens_per_sec_slash(self):
         """tokens/sec metric (used by _determine_aggregation_method) is parsed."""
@@ -137,7 +175,9 @@ class TestCreateSetupFailurePerfEntry:
     def test_returns_dict_with_status_failure(self):
         """Entry has status FAILURE and model name."""
         runner = ContainerRunner(context=MagicMock(), console=MagicMock())
-        runner.context.ctx = {"docker_env_vars": {"MAD_SYSTEM_GPU_ARCHITECTURE": "gfx90a"}}
+        runner.context.ctx = {
+            "docker_env_vars": {"MAD_SYSTEM_GPU_ARCHITECTURE": "gfx90a"}
+        }
 
         model_info = {"name": "org/model1", "tags": "v1", "n_gpus": "2"}
         build_info = {"dockerfile": "Dockerfile", "docker_image": "img:latest"}
@@ -195,9 +235,16 @@ def test_setup_failure_appends_to_failed_runs_and_records_to_csv(
                 )
 
             manifest = {
-                "built_images": {"img1": {"docker_image": "local/img1", "dockerfile": "D"}},
+                "built_images": {
+                    "img1": {"docker_image": "local/img1", "dockerfile": "D"}
+                },
                 "built_models": {
-                    "img1": {"name": "test/model", "tags": "t1", "n_gpus": "1", "args": ""}
+                    "img1": {
+                        "name": "test/model",
+                        "tags": "t1",
+                        "n_gpus": "1",
+                        "args": "",
+                    }
                 },
             }
             with open(manifest_path, "w") as f:
diff --git a/tests/unit/test_container_runner_helpers.py b/tests/unit/test_container_runner_helpers.py
index a4d96539..2d862a69 100644
--- a/tests/unit/test_container_runner_helpers.py
+++ b/tests/unit/test_container_runner_helpers.py
@@ -110,10 +110,7 @@ def test_excludes_grep_meta_line(self):
         assert log_text_has_error_pattern(log, "RuntimeError:", [], ())
 
     def test_regex_benign_excludes_rocprof_style_line(self):
-        log = (
-            "E12345678  generateRocpd.cpp: noise\n"
-            "clean RuntimeError: real issue\n"
-        )
+        log = "E12345678  generateRocpd.cpp: noise\n" "clean RuntimeError: real issue\n"
         assert log_text_has_error_pattern(
             log,
             "RuntimeError:",
diff --git a/tests/unit/test_context_logic.py b/tests/unit/test_context_logic.py
index 17d1de5d..28ebfa85 100644
--- a/tests/unit/test_context_logic.py
+++ b/tests/unit/test_context_logic.py
@@ -6,8 +6,9 @@
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
+from unittest.mock import MagicMock, Mock, patch
+
 import pytest
-from unittest.mock import Mock, MagicMock, patch
 
 from madengine.core.context import Context
 from madengine.utils.gpu_validator import GPUVendor
@@ -16,7 +17,7 @@
 @pytest.mark.unit
 class TestContextInitialization:
     """Test Context object initialization."""
-    
+
     @patch.object(Context, "get_gpu_renderD_nodes", return_value=None)
     @patch.object(Context, "get_docker_gpus", return_value="0")
     @patch.object(Context, "get_system_gpu_product_name", return_value="Test GPU")
@@ -36,11 +37,11 @@ def test_context_initializes_with_defaults(
     ):
         """Context should initialize with system defaults."""
         context = Context()
-        
+
         assert context.get_gpu_vendor() == "AMD"
         assert context.get_system_ngpus() == 1
         assert context.get_system_gpu_architecture() == "gfx90a"
-    
+
     # REMOVED: test_context_detects_nvidia_gpus and test_context_handles_cpu_only
     # These tests require actual GPU detection and are better suited as integration tests.
     # Context initialization tests are covered in integration/test_platform_integration.py
@@ -49,7 +50,7 @@ def test_context_initializes_with_defaults(
 @pytest.mark.unit
 class TestBuildArgGeneration:
     """Test Docker build argument generation logic."""
-    
+
     @patch.object(Context, "get_gpu_renderD_nodes", return_value=None)
     @patch.object(Context, "get_docker_gpus", return_value="0")
     @patch.object(Context, "get_system_gpu_product_name", return_value="Test GPU")
@@ -72,12 +73,14 @@ def test_generates_build_args_for_amd(
         context.ctx = {
             "docker_build_arg": {
                 "MAD_GPU_VENDOR": "AMD",
-                "MAD_SYSTEM_GPU_ARCHITECTURE": "gfx90a"
+                "MAD_SYSTEM_GPU_ARCHITECTURE": "gfx90a",
             }
         }
-        
+
         assert context.ctx["docker_build_arg"]["MAD_GPU_VENDOR"] == "AMD"
-        assert context.ctx["docker_build_arg"]["MAD_SYSTEM_GPU_ARCHITECTURE"] == "gfx90a"
+        assert (
+            context.ctx["docker_build_arg"]["MAD_SYSTEM_GPU_ARCHITECTURE"] == "gfx90a"
+        )
 
 
 @pytest.mark.unit
@@ -101,9 +104,9 @@ def _make_build_only_ctx(additional_context="{}") -> Context:
     Returns a fully constructed Context whose ctx dict is populated from additional_context
     but whose init_build_context has NOT yet run, so callers can invoke it in a controlled way.
     """
-    with patch.object(Context, "init_build_context"), \
-         patch.object(Context, "get_ctx_test", return_value="test"), \
-         patch.object(Context, "get_host_os", return_value="linux"):
+    with patch.object(Context, "init_build_context"), patch.object(
+        Context, "get_ctx_test", return_value="test"
+    ), patch.object(Context, "get_host_os", return_value="linux"):
         ctx = Context(additional_context=additional_context, build_only_mode=True)
     return ctx
 
@@ -122,11 +125,19 @@ def test_auto_detect_injects_arch_when_absent(self):
         # get_gpu_tool_manager is a module-level import in context.py; patch it there.
         # detect_gpu_vendor / normalize_architecture_name are imported locally inside
         # init_build_context, so patch them at their source modules.
-        with patch("madengine.core.context.get_gpu_tool_manager", return_value=manager), \
-             patch("madengine.utils.gpu_validator.detect_gpu_vendor", return_value=GPUVendor.AMD), \
-             patch("madengine.execution.dockerfile_utils.normalize_architecture_name", return_value="gfx942"), \
-             patch.object(Context, "get_ctx_test", return_value="test"), \
-             patch.object(Context, "get_host_os", return_value="linux"):
+        with patch(
+            "madengine.core.context.get_gpu_tool_manager", return_value=manager
+        ), patch(
+            "madengine.utils.gpu_validator.detect_gpu_vendor",
+            return_value=GPUVendor.AMD,
+        ), patch(
+            "madengine.execution.dockerfile_utils.normalize_architecture_name",
+            return_value="gfx942",
+        ), patch.object(
+            Context, "get_ctx_test", return_value="test"
+        ), patch.object(
+            Context, "get_host_os", return_value="linux"
+        ):
             ctx.init_build_context(detect_gpu_arch=True)
 
         assert ctx.ctx["docker_build_arg"]["MAD_SYSTEM_GPU_ARCHITECTURE"] == "gfx942"
@@ -140,11 +151,19 @@ def test_auto_detect_does_not_override_user_value(self):
         manager = MagicMock()
         manager.get_gpu_architecture.return_value = "gfx942"
 
-        with patch("madengine.core.context.get_gpu_tool_manager", return_value=manager), \
-             patch("madengine.utils.gpu_validator.detect_gpu_vendor", return_value=GPUVendor.AMD), \
-             patch("madengine.execution.dockerfile_utils.normalize_architecture_name", return_value="gfx942"), \
-             patch.object(Context, "get_ctx_test", return_value="test"), \
-             patch.object(Context, "get_host_os", return_value="linux"):
+        with patch(
+            "madengine.core.context.get_gpu_tool_manager", return_value=manager
+        ), patch(
+            "madengine.utils.gpu_validator.detect_gpu_vendor",
+            return_value=GPUVendor.AMD,
+        ), patch(
+            "madengine.execution.dockerfile_utils.normalize_architecture_name",
+            return_value="gfx942",
+        ), patch.object(
+            Context, "get_ctx_test", return_value="test"
+        ), patch.object(
+            Context, "get_host_os", return_value="linux"
+        ):
             ctx.init_build_context(detect_gpu_arch=True)
 
         # User value must be preserved; auto-detect must not overwrite it.
@@ -154,10 +173,14 @@ def test_auto_detect_warns_on_no_gpu(self):
         """Should warn (not crash) when no supported GPU is detected."""
         ctx = _make_build_only_ctx()
 
-        with patch("madengine.utils.gpu_validator.detect_gpu_vendor", return_value=GPUVendor.UNKNOWN), \
-             patch.object(Context, "get_ctx_test", return_value="test"), \
-             patch.object(Context, "get_host_os", return_value="linux"), \
-             patch("builtins.print") as mock_print:
+        with patch(
+            "madengine.utils.gpu_validator.detect_gpu_vendor",
+            return_value=GPUVendor.UNKNOWN,
+        ), patch.object(Context, "get_ctx_test", return_value="test"), patch.object(
+            Context, "get_host_os", return_value="linux"
+        ), patch(
+            "builtins.print"
+        ) as mock_print:
             ctx.init_build_context(detect_gpu_arch=True)
 
         msgs = [str(c.args[0]) for c in mock_print.call_args_list if c.args]
@@ -168,10 +191,14 @@ def test_auto_detect_handles_exception_gracefully(self):
         """Detection failure should warn, not raise."""
         ctx = _make_build_only_ctx()
 
-        with patch("madengine.utils.gpu_validator.detect_gpu_vendor", side_effect=RuntimeError("rocminfo not found")), \
-             patch.object(Context, "get_ctx_test", return_value="test"), \
-             patch.object(Context, "get_host_os", return_value="linux"), \
-             patch("builtins.print") as mock_print:
+        with patch(
+            "madengine.utils.gpu_validator.detect_gpu_vendor",
+            side_effect=RuntimeError("rocminfo not found"),
+        ), patch.object(Context, "get_ctx_test", return_value="test"), patch.object(
+            Context, "get_host_os", return_value="linux"
+        ), patch(
+            "builtins.print"
+        ) as mock_print:
             ctx.init_build_context(detect_gpu_arch=True)
 
         msgs = [str(c.args[0]) for c in mock_print.call_args_list if c.args]
@@ -182,9 +209,13 @@ def test_no_detection_when_flag_is_false(self):
         """detect_gpu_arch=False should skip detection entirely."""
         ctx = _make_build_only_ctx()
 
-        with patch("madengine.utils.gpu_validator.detect_gpu_vendor") as mock_detect, \
-             patch.object(Context, "get_ctx_test", return_value="test"), \
-             patch.object(Context, "get_host_os", return_value="linux"):
+        with patch(
+            "madengine.utils.gpu_validator.detect_gpu_vendor"
+        ) as mock_detect, patch.object(
+            Context, "get_ctx_test", return_value="test"
+        ), patch.object(
+            Context, "get_host_os", return_value="linux"
+        ):
             ctx.init_build_context(detect_gpu_arch=False)
 
         mock_detect.assert_not_called()
diff --git a/tests/unit/test_database_mongodb.py b/tests/unit/test_database_mongodb.py
index b9f0aa65..d53a70a9 100644
--- a/tests/unit/test_database_mongodb.py
+++ b/tests/unit/test_database_mongodb.py
@@ -12,26 +12,27 @@
 import tempfile
 from pathlib import Path
 from unittest.mock import MagicMock, patch
+
 import pytest
 
 from madengine.database.mongodb import (
+    CSVLoader,
+    DocumentTransformer,
+    FileFormat,
+    JSONLoader,
     MongoDBConfig,
     UploadOptions,
     UploadResult,
-    FileFormat,
-    JSONLoader,
-    CSVLoader,
-    DocumentTransformer,
     detect_file_format,
     get_loader,
     upload_file_to_mongodb,
 )
 
-
 # ============================================================================
 # Fixtures
 # ============================================================================
 
+
 @pytest.fixture
 def sample_json_data():
     """Sample JSON data with native types."""
@@ -41,39 +42,31 @@ def sample_json_data():
             "performance": 123.45,
             "metric": "tokens/sec",
             "status": "SUCCESS",
-            "configs": {
-                "batch_size": 32,
-                "learning_rate": 0.001
-            },
+            "configs": {"batch_size": 32, "learning_rate": 0.001},
             "enabled": True,
-            "timestamp": "2026-01-07 10:00:00"
+            "timestamp": "2026-01-07 10:00:00",
         },
         {
             "model": "test_model_2",
             "performance": 234.56,
             "metric": "tokens/sec",
             "status": "SUCCESS",
-            "configs": {
-                "batch_size": 64,
-                "learning_rate": 0.002
-            },
+            "configs": {"batch_size": 64, "learning_rate": 0.002},
             "enabled": False,
-            "timestamp": "2026-01-07 10:05:00"
-        }
+            "timestamp": "2026-01-07 10:05:00",
+        },
     ]
 
 
 @pytest.fixture
 def temp_json_file(sample_json_data):
     """Create a temporary JSON file."""
-    with tempfile.NamedTemporaryFile(
-        mode='w', suffix='.json', delete=False
-    ) as f:
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
         json.dump(sample_json_data, f)
         file_path = f.name
-    
+
     yield Path(file_path)
-    
+
     # Cleanup
     if os.path.exists(file_path):
         os.unlink(file_path)
@@ -83,15 +76,15 @@ def temp_json_file(sample_json_data):
 def temp_csv_file():
     """Create a temporary CSV file."""
     with tempfile.NamedTemporaryFile(
-        mode='w', suffix='.csv', delete=False, newline=''
+        mode="w", suffix=".csv", delete=False, newline=""
     ) as f:
         f.write("model,performance,metric,status,timestamp\n")
         f.write("csv_model_1,345.67,tokens/sec,SUCCESS,2026-01-07 11:00:00\n")
         f.write("csv_model_2,456.78,tokens/sec,SUCCESS,2026-01-07 11:05:00\n")
         file_path = f.name
-    
+
     yield Path(file_path)
-    
+
     # Cleanup
     if os.path.exists(file_path):
         os.unlink(file_path)
@@ -101,11 +94,12 @@ def temp_csv_file():
 # Configuration Tests
 # ============================================================================
 
+
 @pytest.mark.unit
 def test_mongodb_config_defaults():
     """Test MongoDBConfig with default values."""
     config = MongoDBConfig()
-    
+
     assert config.host == "localhost"
     assert config.port == 27017
     assert config.username == ""
@@ -122,10 +116,10 @@ def test_mongodb_config_from_env():
         "MONGO_USER": "testuser",
         "MONGO_PASSWORD": "testpass",
     }
-    
+
     with patch.dict(os.environ, env_vars, clear=False):
         config = MongoDBConfig.from_env()
-        
+
         assert config.host == "test-host"
         assert config.port == 27018
         assert config.username == "testuser"
@@ -136,12 +130,9 @@ def test_mongodb_config_from_env():
 def test_mongodb_config_uri_with_auth():
     """Test MongoDB URI generation with authentication."""
     config = MongoDBConfig(
-        host="example.com",
-        port=27017,
-        username="user",
-        password="pass"
+        host="example.com", port=27017, username="user", password="pass"
     )
-    
+
     assert config.uri == "mongodb://user:pass@example.com:27017/admin"
 
 
@@ -149,7 +140,7 @@ def test_mongodb_config_uri_with_auth():
 def test_mongodb_config_uri_without_auth():
     """Test MongoDB URI generation without authentication."""
     config = MongoDBConfig(host="example.com", port=27017)
-    
+
     assert config.uri == "mongodb://example.com:27017"
 
 
@@ -157,7 +148,7 @@ def test_mongodb_config_uri_without_auth():
 def test_upload_options_defaults():
     """Test UploadOptions default values."""
     options = UploadOptions()
-    
+
     assert options.unique_fields is None
     assert options.upsert is True
     assert options.batch_size == 1000
@@ -171,6 +162,7 @@ def test_upload_options_defaults():
 # File Detection Tests
 # ============================================================================
 
+
 @pytest.mark.unit
 def test_detect_json_format_by_extension(temp_json_file):
     """Test JSON format detection by file extension."""
@@ -188,12 +180,10 @@ def test_detect_csv_format_by_extension(temp_csv_file):
 @pytest.mark.unit
 def test_detect_json_format_by_content():
     """Test JSON format detection by content when no extension."""
-    with tempfile.NamedTemporaryFile(
-        mode='w', suffix='', delete=False
-    ) as f:
+    with tempfile.NamedTemporaryFile(mode="w", suffix="", delete=False) as f:
         json.dump({"test": "data"}, f)
         file_path = f.name
-    
+
     try:
         file_format = detect_file_format(Path(file_path))
         assert file_format == FileFormat.JSON
@@ -219,12 +209,13 @@ def test_get_loader_csv():
 # JSON Loader Tests
 # ============================================================================
 
+
 @pytest.mark.unit
 def test_json_loader_load_array(temp_json_file, sample_json_data):
     """Test JSONLoader with array format."""
     loader = JSONLoader()
     documents = loader.load(temp_json_file)
-    
+
     assert len(documents) == 2
     assert documents[0]["model"] == "test_model_1"
     assert documents[0]["performance"] == 123.45
@@ -236,17 +227,15 @@ def test_json_loader_load_array(temp_json_file, sample_json_data):
 def test_json_loader_load_single_object():
     """Test JSONLoader with single object format."""
     data = {"model": "test", "value": 42}
-    
-    with tempfile.NamedTemporaryFile(
-        mode='w', suffix='.json', delete=False
-    ) as f:
+
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
         json.dump(data, f)
         file_path = f.name
-    
+
     try:
         loader = JSONLoader()
         documents = loader.load(Path(file_path))
-        
+
         assert len(documents) == 1
         assert documents[0]["model"] == "test"
         assert documents[0]["value"] == 42
@@ -259,7 +248,7 @@ def test_json_loader_preserves_types(temp_json_file):
     """Test that JSONLoader preserves native types."""
     loader = JSONLoader()
     documents = loader.load(temp_json_file)
-    
+
     doc = documents[0]
     assert isinstance(doc["performance"], float)
     assert isinstance(doc["configs"], dict)
@@ -272,7 +261,7 @@ def test_json_loader_infer_schema(sample_json_data):
     """Test JSON schema inference."""
     loader = JSONLoader()
     schema = loader.infer_schema(sample_json_data)
-    
+
     assert schema["model"] == str
     assert schema["performance"] == float
     assert schema["configs"] == dict
@@ -283,12 +272,13 @@ def test_json_loader_infer_schema(sample_json_data):
 # CSV Loader Tests
 # ============================================================================
 
+
 @pytest.mark.unit
 def test_csv_loader_load(temp_csv_file):
     """Test CSVLoader basic loading."""
     loader = CSVLoader()
     documents = loader.load(temp_csv_file)
-    
+
     assert len(documents) == 2
     assert documents[0]["model"] == "csv_model_1"
     assert documents[1]["model"] == "csv_model_2"
@@ -299,7 +289,7 @@ def test_csv_loader_type_inference(temp_csv_file):
     """Test that CSVLoader infers types correctly."""
     loader = CSVLoader()
     documents = loader.load(temp_csv_file)
-    
+
     doc = documents[0]
     # Performance should be float, not string
     assert isinstance(doc["performance"], (float, int))
@@ -310,16 +300,16 @@ def test_csv_loader_type_inference(temp_csv_file):
 def test_csv_loader_json_string_parsing():
     """Test that CSVLoader parses JSON strings in columns."""
     with tempfile.NamedTemporaryFile(
-        mode='w', suffix='.csv', delete=False, newline=''
+        mode="w", suffix=".csv", delete=False, newline=""
     ) as f:
-        f.write('model,configs\n')
+        f.write("model,configs\n")
         f.write('test,"{""lr"": 0.001}"\n')
         file_path = f.name
-    
+
     try:
         loader = CSVLoader()
         documents = loader.load(Path(file_path))
-        
+
         # Should parse JSON string in configs column
         assert isinstance(documents[0]["configs"], (dict, str))
     finally:
@@ -330,17 +320,17 @@ def test_csv_loader_json_string_parsing():
 def test_csv_loader_handles_null_values():
     """Test CSVLoader handles null/missing values."""
     with tempfile.NamedTemporaryFile(
-        mode='w', suffix='.csv', delete=False, newline=''
+        mode="w", suffix=".csv", delete=False, newline=""
     ) as f:
-        f.write('model,value\n')
-        f.write('test1,42\n')
-        f.write('test2,\n')  # Empty value
+        f.write("model,value\n")
+        f.write("test1,42\n")
+        f.write("test2,\n")  # Empty value
         file_path = f.name
-    
+
     try:
         loader = CSVLoader()
         documents = loader.load(Path(file_path))
-        
+
         assert documents[0]["value"] == 42
         assert documents[1]["value"] is None
     finally:
@@ -351,15 +341,16 @@ def test_csv_loader_handles_null_values():
 # Document Transformer Tests
 # ============================================================================
 
+
 @pytest.mark.unit
 def test_document_transformer_adds_metadata():
     """Test that transformer adds metadata fields."""
     options = UploadOptions(add_metadata=True)
     transformer = DocumentTransformer(options)
-    
+
     documents = [{"model": "test", "value": 42}]
     transformed = transformer.transform(documents)
-    
+
     assert "_meta_uploaded_at" in transformed[0]
     assert "created_date" in transformed[0]
 
@@ -369,11 +360,11 @@ def test_document_transformer_preserves_existing_metadata():
     """Test that transformer preserves existing created_date."""
     options = UploadOptions(add_metadata=True)
     transformer = DocumentTransformer(options)
-    
+
     original_date = "2026-01-01 00:00:00"
     documents = [{"model": "test", "created_date": original_date}]
     transformed = transformer.transform(documents)
-    
+
     assert transformed[0]["created_date"] == original_date
 
 
@@ -382,14 +373,14 @@ def test_document_transformer_infer_unique_fields():
     """Test automatic unique field inference."""
     options = UploadOptions()
     transformer = DocumentTransformer(options)
-    
+
     documents = [
         {"model": "model1", "timestamp": "2026-01-01", "value": 1},
         {"model": "model2", "timestamp": "2026-01-02", "value": 2},
     ]
-    
+
     unique_fields = transformer.infer_unique_fields(documents)
-    
+
     assert "model" in unique_fields
 
 
@@ -398,10 +389,10 @@ def test_document_transformer_no_metadata_when_disabled():
     """Test that metadata is not added when disabled."""
     options = UploadOptions(add_metadata=False)
     transformer = DocumentTransformer(options)
-    
+
     documents = [{"model": "test", "value": 42}]
     transformed = transformer.transform(documents)
-    
+
     assert "_meta_uploaded_at" not in transformed[0]
 
 
@@ -409,6 +400,7 @@ def test_document_transformer_no_metadata_when_disabled():
 # Upload Result Tests
 # ============================================================================
 
+
 @pytest.mark.unit
 def test_upload_result_success_status():
     """Test UploadResult with success status."""
@@ -419,9 +411,9 @@ def test_upload_result_success_status():
         documents_inserted=8,
         documents_updated=2,
         documents_failed=0,
-        duration_seconds=1.5
+        duration_seconds=1.5,
     )
-    
+
     assert result.status == "success"
     assert result.documents_read == 10
     assert result.documents_inserted == 8
@@ -439,9 +431,9 @@ def test_upload_result_with_errors():
         documents_updated=1,
         documents_failed=2,
         errors=["Error 1", "Error 2"],
-        duration_seconds=2.0
+        duration_seconds=2.0,
     )
-    
+
     assert result.status == "partial"
     assert result.documents_failed == 2
     assert len(result.errors) == 2
@@ -451,20 +443,21 @@ def test_upload_result_with_errors():
 # Main Upload Function Tests (Mocked)
 # ============================================================================
 
+
 @pytest.mark.unit
 def test_upload_file_to_mongodb_json_dry_run(temp_json_file):
     """Test uploading JSON file in dry-run mode."""
     config = MongoDBConfig()
     options = UploadOptions(dry_run=True)
-    
+
     result = upload_file_to_mongodb(
         file_path=str(temp_json_file),
         database_name="test_db",
         collection_name="test_collection",
         config=config,
-        options=options
+        options=options,
     )
-    
+
     assert result.status == "success"
     assert result.documents_read == 2
     assert result.documents_processed == 0
@@ -476,15 +469,15 @@ def test_upload_file_to_mongodb_csv_dry_run(temp_csv_file):
     """Test uploading CSV file in dry-run mode."""
     config = MongoDBConfig()
     options = UploadOptions(dry_run=True)
-    
+
     result = upload_file_to_mongodb(
         file_path=str(temp_csv_file),
         database_name="test_db",
         collection_name="test_collection",
         config=config,
-        options=options
+        options=options,
     )
-    
+
     assert result.status == "success"
     assert result.documents_read == 2
 
@@ -493,19 +486,16 @@ def test_upload_file_to_mongodb_csv_dry_run(temp_csv_file):
 def test_upload_file_to_mongodb_auto_detects_unique_fields(temp_json_file):
     """Test that upload auto-detects unique fields."""
     config = MongoDBConfig()
-    options = UploadOptions(
-        dry_run=True,
-        unique_fields=None  # Should auto-detect
-    )
-    
+    options = UploadOptions(dry_run=True, unique_fields=None)  # Should auto-detect
+
     result = upload_file_to_mongodb(
         file_path=str(temp_json_file),
         database_name="test_db",
         collection_name="test_collection",
         config=config,
-        options=options
+        options=options,
     )
-    
+
     assert result.status == "success"
     # Options should have been updated with detected fields
     assert options.unique_fields is not None
@@ -516,14 +506,14 @@ def test_upload_file_to_mongodb_file_not_found():
     """Test upload with non-existent file."""
     config = MongoDBConfig()
     options = UploadOptions()
-    
+
     with pytest.raises(FileNotFoundError):
         upload_file_to_mongodb(
             file_path="/nonexistent/file.json",
             database_name="test_db",
             collection_name="test_collection",
             config=config,
-            options=options
+            options=options,
         )
 
 
@@ -531,25 +521,22 @@ def test_upload_file_to_mongodb_file_not_found():
 def test_upload_file_to_mongodb_with_custom_unique_fields(temp_json_file):
     """Test upload with custom unique fields."""
     config = MongoDBConfig()
-    options = UploadOptions(
-        dry_run=True,
-        unique_fields=["model", "timestamp"]
-    )
-    
+    options = UploadOptions(dry_run=True, unique_fields=["model", "timestamp"])
+
     result = upload_file_to_mongodb(
         file_path=str(temp_json_file),
         database_name="test_db",
         collection_name="test_collection",
         config=config,
-        options=options
+        options=options,
     )
-    
+
     assert result.status == "success"
     assert options.unique_fields == ["model", "timestamp"]
 
 
 @pytest.mark.unit
-@patch('madengine.database.mongodb.MongoDBUploader')
+@patch("madengine.database.mongodb.MongoDBUploader")
 def test_upload_file_to_mongodb_calls_uploader(mock_uploader_class, temp_json_file):
     """Test that upload function properly calls MongoDBUploader."""
     # Setup mock
@@ -562,20 +549,20 @@ def test_upload_file_to_mongodb_calls_uploader(mock_uploader_class, temp_json_fi
         documents_inserted=2,
         documents_updated=0,
         documents_failed=0,
-        duration_seconds=0.1
+        duration_seconds=0.1,
     )
-    
+
     config = MongoDBConfig()
     options = UploadOptions(dry_run=False)
-    
+
     result = upload_file_to_mongodb(
         file_path=str(temp_json_file),
         database_name="test_db",
         collection_name="test_collection",
         config=config,
-        options=options
+        options=options,
     )
-    
+
     # Verify uploader was called
     mock_uploader.upload.assert_called_once()
     assert result.status == "success"
diff --git a/tests/unit/test_deployment.py b/tests/unit/test_deployment.py
index 4d5ad4dd..b7530779 100644
--- a/tests/unit/test_deployment.py
+++ b/tests/unit/test_deployment.py
@@ -15,9 +15,9 @@
     tools_include_rocprof_family,
 )
 
-
 # ---- deployment.base (create_jinja_env) ----
 
+
 class TestCreateJinjaEnv:
     """Test create_jinja_env helper."""
 
@@ -25,7 +25,9 @@ def test_returns_environment_with_dirname_basename_filters(self):
         """create_jinja_env returns Environment with dirname and basename filters."""
         with tempfile.TemporaryDirectory() as tmpdir:
             p = Path(tmpdir)
-            (p / "test.j2").write_text("dir={{ path | dirname }} name={{ path | basename }}")
+            (p / "test.j2").write_text(
+                "dir={{ path | dirname }} name={{ path | basename }}"
+            )
             env = create_jinja_env(p)
             template = env.get_template("test.j2")
             out = template.render(path="/foo/bar/baz.txt")
@@ -43,6 +45,7 @@ def test_template_dir_must_exist(self):
 
 # ---- deployment.common ----
 
+
 class TestValidLaunchers:
     """VALID_LAUNCHERS constant."""
 
@@ -79,7 +82,9 @@ def test_detects_rocprof_and_presets(self):
 
     def test_false_for_rocm_trace_lite(self):
         assert tools_include_rocprof_family([{"name": "rocm_trace_lite"}]) is False
-        assert tools_include_rocprof_family([{"name": "rocm_trace_lite_default"}]) is False
+        assert (
+            tools_include_rocprof_family([{"name": "rocm_trace_lite_default"}]) is False
+        )
 
 
 class TestIsRocprofv3Available:
@@ -107,6 +112,7 @@ def test_returns_false_when_not_found(self):
 
     def test_returns_false_on_timeout(self):
         import subprocess
+
         with patch("madengine.deployment.common.subprocess.run") as m:
             m.side_effect = subprocess.TimeoutExpired("rocprofv3", 5)
             assert is_rocprofv3_available() is False
@@ -191,17 +197,29 @@ def test_multi_node_other_tools_unchanged(self, _mock_avail):
 
 # ---- BaseDeployment._parse_performance_from_log ----
 
+
 class _ConcreteDeployment(BaseDeployment):
     """Minimal concrete subclass to exercise BaseDeployment methods under test."""
 
     DEPLOYMENT_TYPE = "test"
 
-    def validate(self): pass
-    def prepare(self): pass
-    def deploy(self): pass
-    def monitor(self, deployment_id): pass
-    def collect_results(self, deployment_id): pass
-    def cleanup(self, deployment_id): pass
+    def validate(self):
+        pass
+
+    def prepare(self):
+        pass
+
+    def deploy(self):
+        pass
+
+    def monitor(self, deployment_id):
+        pass
+
+    def collect_results(self, deployment_id):
+        pass
+
+    def cleanup(self, deployment_id):
+        pass
 
 
 def _make_deployment():
diff --git a/tests/unit/test_discover_models.py b/tests/unit/test_discover_models.py
index a8bbe43c..86084be8 100644
--- a/tests/unit/test_discover_models.py
+++ b/tests/unit/test_discover_models.py
@@ -57,7 +57,9 @@ def test_unscoped_inference_still_matches_all_repos(self):
 
     def test_colon_in_tag_not_treated_as_scoped(self):
         """model:arg keeps legacy behavior (no scope/tag split on /)."""
-        dm = DiscoverModels(args=argparse.Namespace(tags=["MAD-private/foo:batch-size=32"]))
+        dm = DiscoverModels(
+            args=argparse.Namespace(tags=["MAD-private/foo:batch-size=32"])
+        )
         dm.models = [
             {"name": "MAD-private/foo", "tags": [], "args": ""},
         ]
@@ -113,7 +115,8 @@ def test_unscoped_tag_does_not_cross_scope_boundary(self):
 
     def test_unscoped_tag_matches_scoped_models_by_tag_field(self):
         """--tags inference matches any model carrying that tag, regardless of scope prefix.
-        Tag-list matching is always scope-agnostic; only name-based matching is scope-strict."""
+        Tag-list matching is always scope-agnostic; only name-based matching is scope-strict.
+        """
         dm = DiscoverModels(args=argparse.Namespace(tags=["inference"]))
         dm.models = [
             {"name": "MAD/pyt_foo", "tags": ["inference"], "args": ""},
@@ -121,7 +124,10 @@ def test_unscoped_tag_matches_scoped_models_by_tag_field(self):
         ]
         dm.custom_models = []
         dm.select_models()
-        assert sorted(m["name"] for m in dm.selected_models) == ["MAD/pyt_bar", "MAD/pyt_foo"]
+        assert sorted(m["name"] for m in dm.selected_models) == [
+            "MAD/pyt_bar",
+            "MAD/pyt_foo",
+        ]
 
     def test_unscoped_all_selects_every_model(self):
         """--tags all selects every model regardless of scope."""
@@ -132,7 +138,10 @@ def test_unscoped_all_selects_every_model(self):
         ]
         dm.custom_models = []
         dm.select_models()
-        assert sorted(m["name"] for m in dm.selected_models) == ["MAD/pyt_bar", "pyt_foo"]
+        assert sorted(m["name"] for m in dm.selected_models) == [
+            "MAD/pyt_bar",
+            "pyt_foo",
+        ]
 
     def test_unscoped_tag_matches_root_and_scoped_by_tag_field(self):
         """--tags inference selects root AND scoped models that carry that tag."""
@@ -143,7 +152,10 @@ def test_unscoped_tag_matches_root_and_scoped_by_tag_field(self):
         ]
         dm.custom_models = []
         dm.select_models()
-        assert sorted(m["name"] for m in dm.selected_models) == ["MAD/pyt_foo", "root_model"]
+        assert sorted(m["name"] for m in dm.selected_models) == [
+            "MAD/pyt_foo",
+            "root_model",
+        ]
 
     def test_unscoped_tag_with_extra_args_matches_by_tag_field(self):
         """--tags inference:batch-size=32 selects by tag 'inference', not 'inference:batch-size=32'."""
diff --git a/tests/unit/test_docker_builder.py b/tests/unit/test_docker_builder.py
index 3fe97f9b..e08003ac 100644
--- a/tests/unit/test_docker_builder.py
+++ b/tests/unit/test_docker_builder.py
@@ -35,7 +35,9 @@ def test_create_registry_image_name_uses_dockerhub_repository(docker_builder):
     assert out == "myorg/ci:ci-dummy_dummy.ubuntu.amd"
 
 
-def test_create_registry_image_name_without_credentials_matches_local_tag(docker_builder):
+def test_create_registry_image_name_without_credentials_matches_local_tag(
+    docker_builder,
+):
     out = docker_builder._create_registry_image_name(
         "ci-dummy_dummy.ubuntu.amd",
         "dockerhub",
diff --git a/tests/unit/test_error_handling.py b/tests/unit/test_error_handling.py
index dc210a0b..c1240c08 100644
--- a/tests/unit/test_error_handling.py
+++ b/tests/unit/test_error_handling.py
@@ -30,7 +30,7 @@
     set_error_handler,
     get_error_handler,
     handle_error,
-    create_error_context
+    create_error_context,
 )
 
 
@@ -45,7 +45,7 @@ def test_error_context_creation(self):
             phase="execution",
             component="TestComponent",
             model_name="test_model",
-            additional_info=additional_info
+            additional_info=additional_info,
         )
 
         assert context.operation == "test_operation"
@@ -57,7 +57,7 @@ def test_error_context_creation(self):
 
 class TestMADEngineErrorHierarchy:
     """Test madengine error class hierarchy."""
-    
+
     def test_base_madengine_error(self):
         """Test base madengine error functionality."""
         context = ErrorContext(operation="test")
@@ -66,9 +66,9 @@ def test_base_madengine_error(self):
             category=ErrorCategory.RUNTIME,
             context=context,
             recoverable=True,
-            suggestions=["Try again", "Check logs"]
+            suggestions=["Try again", "Check logs"],
         )
-        
+
         assert str(error) == "Test error"
         assert error.message == "Test error"
         assert error.category == ErrorCategory.RUNTIME
@@ -76,24 +76,27 @@ def test_base_madengine_error(self):
         assert error.recoverable is True
         assert error.suggestions == ["Try again", "Check logs"]
         assert error.cause is None
-    
-    @pytest.mark.parametrize("error_class,category,recoverable,message", [
-        (ValidationError, ErrorCategory.VALIDATION, True, "Invalid input"),
-        (NetworkError, ErrorCategory.CONNECTION, True, "Connection failed"),
-        (BuildError, ErrorCategory.BUILD, False, "Build failed"),
-        (RunnerError, ErrorCategory.RUNNER, True, "Runner execution failed"),
-        (AuthenticationError, ErrorCategory.AUTHENTICATION, True, "Auth failed"),
-        (ConfigurationError, ErrorCategory.CONFIGURATION, True, "Config error"),
-    ])
+
+    @pytest.mark.parametrize(
+        "error_class,category,recoverable,message",
+        [
+            (ValidationError, ErrorCategory.VALIDATION, True, "Invalid input"),
+            (NetworkError, ErrorCategory.CONNECTION, True, "Connection failed"),
+            (BuildError, ErrorCategory.BUILD, False, "Build failed"),
+            (RunnerError, ErrorCategory.RUNNER, True, "Runner execution failed"),
+            (AuthenticationError, ErrorCategory.AUTHENTICATION, True, "Auth failed"),
+            (ConfigurationError, ErrorCategory.CONFIGURATION, True, "Config error"),
+        ],
+    )
     def test_error_types(self, error_class, category, recoverable, message):
         """Test all error types with parametrized test."""
         error = error_class(message)
-        
+
         assert isinstance(error, MADEngineError)
         assert error.category == category
         assert error.recoverable is recoverable
         assert str(error) == message
-    
+
     def test_error_with_cause(self):
         """Test error with underlying cause."""
         original_error = ValueError("Original error")
@@ -120,13 +123,10 @@ def setup_method(self):
     def test_handle_madengine_error(self):
         """Test handling of madengine structured errors."""
         context = create_error_context(
-            operation="test_operation",
-            component="TestComponent"
+            operation="test_operation", component="TestComponent"
         )
         error = ValidationError(
-            "Test validation error",
-            context=context,
-            suggestions=["Check input"]
+            "Test validation error", context=context, suggestions=["Check input"]
         )
 
         self.error_handler.handle_error(error)
@@ -160,8 +160,6 @@ def test_set_and_get_error_handler(self):
         assert retrieved_handler == handler
 
 
-
-
 class TestErrorRecoveryAndSuggestions:
     """Test error recovery indicators and suggestions."""
 
@@ -176,15 +174,13 @@ def test_non_recoverable_errors(self):
         assert BuildError("Build error").recoverable is False
 
 
-
-
 class TestErrorPatternMatching:
     """Test error pattern matching for log analysis.
-    
+
     These tests validate the error pattern fixes for GPT2 training,
     ensuring ROCProf logs are correctly excluded while real errors are caught.
     """
-    
+
     @pytest.fixture
     def benign_patterns(self):
         """Benign patterns that should be excluded from error detection."""
@@ -199,7 +195,7 @@ def benign_patterns(self):
             "rocpd_op:",
             "rpd_tracer:",
         ]
-    
+
     @pytest.fixture
     def error_patterns(self):
         """Error patterns that should be detected in logs."""
@@ -218,7 +214,7 @@ def error_patterns(self):
             "ImportError:",
             "ModuleNotFoundError:",
         ]
-    
+
     def test_benign_patterns_match_rocprof_logs(self, benign_patterns):
         """Test that benign patterns correctly match ROCProf logs."""
         # Test cases that should be excluded (false positives)
@@ -230,11 +226,11 @@ def test_benign_patterns_match_rocprof_logs(self, benign_patterns):
             "rocpd_op: 0",
             "rpd_tracer: finalized in 50.142105 ms",
         ]
-        
+
         for test_line in rocprof_messages:
             matched = any(re.search(pattern, test_line) for pattern in benign_patterns)
             assert matched, f"Failed to match ROCProf log: {test_line[:80]}"
-    
+
     def test_error_patterns_catch_real_errors(self, error_patterns):
         """Test that error patterns correctly catch real errors."""
         # Test cases that should be caught (real errors)
@@ -247,11 +243,11 @@ def test_error_patterns_catch_real_errors(self, error_patterns):
             "AssertionError: Expected shape (2, 3) but got (3, 2)",
             "torch.distributed.elastic.multiprocessing.errors.ChildFailedError: FAILED",
         ]
-        
+
         for test_line in real_errors:
             matched = any(re.search(pattern, test_line) for pattern in error_patterns)
             assert matched, f"Failed to catch error: {test_line[:80]}"
-    
+
     def test_rocprof_messages_dont_trigger_errors(self, error_patterns):
         """Test that ROCProf messages don't trigger error patterns."""
         # ROCProf messages that should NOT trigger errors
@@ -261,11 +257,13 @@ def test_rocprof_messages_dont_trigger_errors(self, error_patterns):
             "rocpd_op: 0",
             "rpd_tracer: finalized in 50.142105 ms",
         ]
-        
+
         for test_line in rocprof_messages:
             matched = any(re.search(pattern, test_line) for pattern in error_patterns)
-            assert not matched, f"False positive: {test_line[:80]} matched error pattern"
+            assert (
+                not matched
+            ), f"False positive: {test_line[:80]} matched error pattern"
 
 
 if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
\ No newline at end of file
+    pytest.main([__file__, "-v"])
diff --git a/tests/unit/test_errors.py b/tests/unit/test_errors.py
index 078e2d57..fa2b32fc 100644
--- a/tests/unit/test_errors.py
+++ b/tests/unit/test_errors.py
@@ -24,6 +24,7 @@
 
 # ---- CLI error integration ----
 
+
 class TestCLIErrorIntegration:
     """CLI error handling setup and display."""
 
@@ -50,7 +51,9 @@ def test_build_command_error_handling(self):
 
         setup_logging(verbose=False)
         error = Exception("Test build error")
-        context = create_error_context(operation="build", phase="build", component="CLI")
+        context = create_error_context(
+            operation="build", phase="build", component="CLI"
+        )
         handle_error(error, context=context)
 
     @patch("madengine.cli.utils.console")
@@ -62,7 +65,9 @@ def test_cli_error_display_consistency(self, mock_console):
         handler = get_error_handler()
         error = ConfigurationError(
             "Invalid configuration",
-            context=create_error_context(operation="cli_command", component="CLI", phase="validation"),
+            context=create_error_context(
+                operation="cli_command", component="CLI", phase="validation"
+            ),
         )
         handler.handle_error(error)
         assert handler.console is not None
@@ -70,6 +75,7 @@ def test_cli_error_display_consistency(self, mock_console):
 
 # ---- Error workflow ----
 
+
 class TestErrorWorkflow:
     """End-to-end error flow and logging."""
 
@@ -134,11 +140,14 @@ def test_error_context_serialization(self):
         )
         error = ExecutionError("Model execution failed", context=context)
         data = json.dumps(error.context.__dict__, default=str)
-        assert "model_execution" in data and "ContainerRunner" in data and "abc123" in data
+        assert (
+            "model_execution" in data and "ContainerRunner" in data and "abc123" in data
+        )
 
 
 # ---- Unified error system ----
 
+
 class TestUnifiedErrorSystem:
     """Unified error handling system."""
 
@@ -147,7 +156,9 @@ def test_error_system_basic_functionality(self):
         mock_console = Mock()
         handler = ErrorHandler(console=mock_console, verbose=False)
         context = create_error_context(
-            operation="test_operation", component="TestComponent", model_name="test_model"
+            operation="test_operation",
+            component="TestComponent",
+            model_name="test_model",
         )
         error = ValidationError("Test validation error", context=context)
         handler.handle_error(error)
@@ -218,7 +229,9 @@ def test_global_error_handler_workflow(self):
         set_error_handler(handler)
         error = ValidationError(
             "Global handler test",
-            context=create_error_context(operation="global_test", component="TestGlobalHandler"),
+            context=create_error_context(
+                operation="global_test", component="TestGlobalHandler"
+            ),
         )
         handle_error(error)
         mock_console.print.assert_called_once()
@@ -241,14 +254,20 @@ def test_error_suggestions_and_recovery(self):
 
     def test_nested_error_handling(self):
         """Nested errors with cause chain are handled."""
-        from madengine.core.errors import ExecutionError as MADRuntimeError, OrchestrationError, NetworkError
+        from madengine.core.errors import (
+            ExecutionError as MADRuntimeError,
+            OrchestrationError,
+            NetworkError,
+        )
 
         orig = NetworkError("Network timeout")
         runtime = MADRuntimeError("Operation failed", cause=orig)
         final = OrchestrationError("Orchestration failed", cause=runtime)
         assert final.cause == runtime and runtime.cause == orig
         mock_console = Mock()
-        ErrorHandler(console=mock_console, verbose=True).handle_error(final, show_traceback=True)
+        ErrorHandler(console=mock_console, verbose=True).handle_error(
+            final, show_traceback=True
+        )
         assert mock_console.print.call_count >= 1
 
     def test_error_performance(self):
@@ -261,7 +280,9 @@ def test_error_performance(self):
         for i in range(100):
             err = ValidationError(
                 f"Test error {i}",
-                context=create_error_context(operation=f"test_op_{i}", component="PerformanceTest"),
+                context=create_error_context(
+                    operation=f"test_op_{i}", component="PerformanceTest"
+                ),
             )
             handler.handle_error(err)
         assert time.time() - start < 1.0
@@ -270,6 +291,7 @@ def test_error_performance(self):
 
 # ---- Performance (lightweight) ----
 
+
 class TestErrorHandlingPerformance:
     """Error handler and context creation performance."""
 
@@ -290,13 +312,17 @@ def test_error_context_creation_performance(self):
         start = time.time()
         for i in range(1000):
             create_error_context(
-                operation=f"op_{i}", component=f"C_{i}", phase="test", model_name=f"m_{i}"
+                operation=f"op_{i}",
+                component=f"C_{i}",
+                phase="test",
+                model_name=f"m_{i}",
             )
         assert time.time() - start < 0.1
 
 
 # ---- Backward compatibility ----
 
+
 class TestErrorSystemBackwardCompatibility:
     """Backward compatibility of the error system."""
 
@@ -307,7 +333,9 @@ def test_legacy_exception_handling_still_works(self):
         except Exception as e:
             mock_console = Mock()
             handler = ErrorHandler(console=mock_console)
-            context = create_error_context(operation="legacy_handling", component="LegacyTest")
+            context = create_error_context(
+                operation="legacy_handling", component="LegacyTest"
+            )
             handler.handle_error(e, context=context)
             mock_console.print.assert_called_once()
 
diff --git a/tests/unit/test_execution.py b/tests/unit/test_execution.py
index dc18121e..c17ab291 100644
--- a/tests/unit/test_execution.py
+++ b/tests/unit/test_execution.py
@@ -16,9 +16,9 @@
     parse_dockerfile_gpu_variables,
 )
 
-
 # ---- Timeout ----
 
+
 class TestTimeout:
     """Timeout context manager: None/0 must not arm signal.alarm."""
 
@@ -34,11 +34,13 @@ def test_positive_seconds_raises_on_expiry(self):
         with pytest.raises(TimeoutError):
             with Timeout(1):
                 import time
+
                 time.sleep(2)
 
 
 # ---- container_runner_helpers ----
 
+
 class TestResolveRunTimeout:
     """resolve_run_timeout behavior."""
 
@@ -59,8 +61,13 @@ def test_falsy_model_timeout_ignored_uses_cli(self, model_timeout):
         assert resolve_run_timeout({"timeout": model_timeout}, 7200) == 7200
 
     def test_custom_default_cli(self):
-        assert resolve_run_timeout({"timeout": 100}, 5000, default_cli_timeout=5000) == 100
-        assert resolve_run_timeout({"timeout": 100}, 7200, default_cli_timeout=5000) == 7200
+        assert (
+            resolve_run_timeout({"timeout": 100}, 5000, default_cli_timeout=5000) == 100
+        )
+        assert (
+            resolve_run_timeout({"timeout": 100}, 7200, default_cli_timeout=5000)
+            == 7200
+        )
 
     def test_no_timeout_sentinel_none_passthrough(self):
         # --timeout 0 is converted to None by the CLI; resolve_run_timeout must
@@ -94,7 +101,9 @@ class TestMakeRunLogFilePath:
 
     def test_basic_format(self):
         out = make_run_log_file_path(
-            {"name": "org/model"}, "ci-org_model_ubuntu.22.04", "",
+            {"name": "org/model"},
+            "ci-org_model_ubuntu.22.04",
+            "",
         )
         assert out == "org_model_ubuntu.22.04.live.log"
 
@@ -104,7 +113,9 @@ def test_phase_suffix_appended(self):
 
     def test_slashes_in_model_name_replaced(self):
         out = make_run_log_file_path(
-            {"name": "foo/bar/baz"}, "ci-foo_bar_baz_ubuntu", "",
+            {"name": "foo/bar/baz"},
+            "ci-foo_bar_baz_ubuntu",
+            "",
         )
         assert "/" not in out
         assert out.endswith(".live.log")
@@ -116,14 +127,18 @@ def test_image_without_ci_prefix(self):
 
     def test_no_model_prefix_in_image(self):
         out = make_run_log_file_path(
-            {"name": "other/model"}, "ci-some_ubuntu_22", "",
+            {"name": "other/model"},
+            "ci-some_ubuntu_22",
+            "",
         )
         assert out == "other_model_some_ubuntu_22.live.log"
 
     def test_full_registry_ref_matches_short_ci_tag(self):
         """Run log name must match build log base when image is registry/name:ci-…."""
         model = {"name": "primus_pretrain/torchtitan_MI300X_qwen3_4B-pretrain"}
-        short = "ci-primus_pretrain_torchtitan_mi300x_qwen3_4b-pretrain_primus.ubuntu.amd"
+        short = (
+            "ci-primus_pretrain_torchtitan_mi300x_qwen3_4b-pretrain_primus.ubuntu.amd"
+        )
         full = f"rocm/mad-private:{short}"
         assert make_run_log_file_path(model, short, ".run") == make_run_log_file_path(
             model, full, ".run"
@@ -136,6 +151,7 @@ def test_full_registry_ref_matches_short_ci_tag(self):
 
 # ---- dockerfile_utils ----
 
+
 class TestGpuArchVariables:
     def test_contains_expected_vars(self):
         assert "MAD_SYSTEM_GPU_ARCHITECTURE" in GPU_ARCH_VARIABLES
@@ -183,22 +199,32 @@ def test_empty_returns_none(self):
 
 class TestIsTargetArchCompatibleWithVariable:
     def test_mad_system_always_compatible(self):
-        assert is_target_arch_compatible_with_variable(
-            "MAD_SYSTEM_GPU_ARCHITECTURE", ["gfx90a"], "gfx908"
-        ) is True
+        assert (
+            is_target_arch_compatible_with_variable(
+                "MAD_SYSTEM_GPU_ARCHITECTURE", ["gfx90a"], "gfx908"
+            )
+            is True
+        )
 
     def test_multi_arch_target_in_list(self):
-        assert is_target_arch_compatible_with_variable(
-            "PYTORCH_ROCM_ARCH", ["gfx90a", "gfx908"], "gfx90a"
-        ) is True
-        assert is_target_arch_compatible_with_variable(
-            "GPU_TARGETS", ["gfx90a"], "gfx908"
-        ) is False
+        assert (
+            is_target_arch_compatible_with_variable(
+                "PYTORCH_ROCM_ARCH", ["gfx90a", "gfx908"], "gfx90a"
+            )
+            is True
+        )
+        assert (
+            is_target_arch_compatible_with_variable("GPU_TARGETS", ["gfx90a"], "gfx908")
+            is False
+        )
 
     def test_gfx_compilation_exact_match(self):
-        assert is_target_arch_compatible_with_variable(
-            "GFX_COMPILATION_ARCH", ["gfx90a"], "gfx90a"
-        ) is True
+        assert (
+            is_target_arch_compatible_with_variable(
+                "GFX_COMPILATION_ARCH", ["gfx90a"], "gfx90a"
+            )
+            is True
+        )
 
 
 class TestIsCompilationArchCompatible:
diff --git a/tests/unit/test_hydra_config_loader.py b/tests/unit/test_hydra_config_loader.py
new file mode 100644
index 00000000..6330937b
--- /dev/null
+++ b/tests/unit/test_hydra_config_loader.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+"""Tests for HydraConfigLoader."""
+
+import os
+import tempfile
+
+import pytest
+from omegaconf import DictConfig
+
+from madengine.config.loader import HydraConfigLoader
+from madengine.core.errors import ConfigurationError
+
+
+class TestParseArgs:
+    def test_hydra_overrides_only(self):
+        user_file, overrides = HydraConfigLoader._parse_args(
+            ["scheduler=slurm", "distributed.nnodes=4"]
+        )
+        assert user_file is None
+        assert overrides == ["scheduler=slurm", "distributed.nnodes=4"]
+
+    def test_yaml_file_only(self):
+        user_file, overrides = HydraConfigLoader._parse_args(["/path/to/config.yaml"])
+        assert user_file == "/path/to/config.yaml"
+        assert overrides == []
+
+    def test_yaml_file_with_overrides(self):
+        user_file, overrides = HydraConfigLoader._parse_args(
+            ["/path/to/config.yaml", "distributed.nnodes=8"]
+        )
+        assert user_file == "/path/to/config.yaml"
+        assert overrides == ["distributed.nnodes=8"]
+
+    def test_yml_extension_recognized(self):
+        user_file, overrides = HydraConfigLoader._parse_args(["/path/to/config.yml"])
+        assert user_file == "/path/to/config.yml"
+
+    def test_multiple_yaml_files_raises(self):
+        with pytest.raises(ConfigurationError, match="Only one YAML"):
+            HydraConfigLoader._parse_args(["/path/a.yaml", "/path/b.yaml"])
+
+    def test_append_override_not_treated_as_file(self):
+        user_file, overrides = HydraConfigLoader._parse_args(["+profile=mi300x_8gpu"])
+        assert user_file is None
+        assert overrides == ["+profile=mi300x_8gpu"]
+
+    def test_empty_args(self):
+        user_file, overrides = HydraConfigLoader._parse_args([])
+        assert user_file is None
+        assert overrides == []
+
+
+class TestLoad:
+    def test_defaults_only(self):
+        cfg = HydraConfigLoader.load([])
+        assert isinstance(cfg, DictConfig)
+        assert cfg.gpu_vendor == "AMD"
+        assert cfg.guest_os == "UBUNTU"
+        assert cfg.distributed.enabled is False
+
+    def test_scheduler_override(self):
+        cfg = HydraConfigLoader.load(["scheduler=slurm"])
+        assert "slurm" in cfg
+        assert cfg.slurm.partition == "amd-rccl"
+
+    def test_launcher_override(self):
+        cfg = HydraConfigLoader.load(["launcher=torchrun"])
+        assert cfg.distributed.enabled is True
+        assert cfg.distributed.launcher == "torchrun"
+
+    def test_inline_value_override(self):
+        cfg = HydraConfigLoader.load(["launcher=torchrun", "distributed.nnodes=4"])
+        assert cfg.distributed.nnodes == 4
+
+    def test_append_profile(self):
+        cfg = HydraConfigLoader.load(["+profile=mi300x_8gpu"])
+        assert cfg.gpu_type == "mi300x"
+        assert cfg.distributed.nproc_per_node == 8
+
+    def test_user_yaml_file(self):
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+            f.write("debug: true\nenv_vars:\n  MY_VAR: hello\n")
+            f.flush()
+            try:
+                cfg = HydraConfigLoader.load([f.name])
+                assert cfg.debug is True
+                assert cfg.env_vars.MY_VAR == "hello"
+            finally:
+                os.unlink(f.name)
+
+    def test_user_yaml_with_overrides(self):
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+            f.write("debug: true\n")
+            f.flush()
+            try:
+                cfg = HydraConfigLoader.load([f.name, "scheduler=slurm"])
+                assert cfg.debug is True
+                assert "slurm" in cfg
+            finally:
+                os.unlink(f.name)
+
+    def test_hardware_nvidia(self):
+        cfg = HydraConfigLoader.load(["hardware=nvidia"])
+        assert cfg.gpu_vendor == "NVIDIA"
+        assert cfg.runtime.use_gpu_flag is True
diff --git a/tests/unit/test_k8s.py b/tests/unit/test_k8s.py
index 3391b70c..883d0381 100644
--- a/tests/unit/test_k8s.py
+++ b/tests/unit/test_k8s.py
@@ -17,11 +17,11 @@
     SECRETS_STRATEGY_EXISTING,
     SECRETS_STRATEGY_FROM_LOCAL,
     SECRETS_STRATEGY_OMIT,
+    build_registry_secret_data,
     estimate_configmap_payload_bytes,
     merge_secrets_config,
     resolve_image_pull_secret_refs,
     resolve_runtime_secret_name,
-    build_registry_secret_data,
 )
 from madengine.deployment.kubernetes import (
     _pod_job_name_label_selector,
@@ -102,9 +102,7 @@ def test_resolve_runtime_secret_name_existing():
 
 
 def test_resolve_runtime_secret_name_omit_optional():
-    assert (
-        resolve_runtime_secret_name(SECRETS_STRATEGY_OMIT, {}, None) is None
-    )
+    assert resolve_runtime_secret_name(SECRETS_STRATEGY_OMIT, {}, None) is None
 
 
 def test_estimate_skips_credential_when_not_in_configmap():
@@ -123,9 +121,15 @@ def test_estimate_skips_credential_when_not_in_configmap():
 
 def test_pvc_match_exact():
     assigned: set = set()
-    assert match_pvc_subdir_to_k8s_pod("my-pod", ["my-pod", "my-pod-0-abc"], assigned) == "my-pod"
+    assert (
+        match_pvc_subdir_to_k8s_pod("my-pod", ["my-pod", "my-pod-0-abc"], assigned)
+        == "my-pod"
+    )
     assigned.add("my-pod")
-    assert match_pvc_subdir_to_k8s_pod("my-pod", ["my-pod", "my-pod-0-abc"], assigned) == "my-pod-0-abc"
+    assert (
+        match_pvc_subdir_to_k8s_pod("my-pod", ["my-pod", "my-pod-0-abc"], assigned)
+        == "my-pod-0-abc"
+    )
 
 
 def test_pvc_match_prefix_indexed_job():
diff --git a/tests/unit/test_orchestration.py b/tests/unit/test_orchestration.py
index ece59067..9b89d36e 100644
--- a/tests/unit/test_orchestration.py
+++ b/tests/unit/test_orchestration.py
@@ -1,25 +1,25 @@
 """Unit tests for orchestration: image_filtering and orchestrator init/validation."""
 
 import json
+from unittest.mock import MagicMock, patch
 
 import pytest
-from unittest.mock import MagicMock, patch
 
-from madengine.orchestration.image_filtering import (
-    filter_images_by_gpu_compatibility,
-    filter_images_by_skip_gpu_arch,
-)
 from madengine.core.additional_context_defaults import (
     DEFAULT_GPU_VENDOR,
     DEFAULT_GUEST_OS,
 )
+from madengine.core.errors import ConfigurationError
 from madengine.orchestration.build_orchestrator import BuildOrchestrator
+from madengine.orchestration.image_filtering import (
+    filter_images_by_gpu_compatibility,
+    filter_images_by_skip_gpu_arch,
+)
 from madengine.orchestration.run_orchestrator import RunOrchestrator
-from madengine.core.errors import ConfigurationError
-
 
 # ---- image_filtering ----
 
+
 class TestFilterImagesByGpuCompatibility:
     """filter_images_by_gpu_compatibility behavior."""
 
@@ -94,9 +94,7 @@ def test_skip_gpu_arch_match_skipped(self):
     def test_skip_gpu_arch_nvidia_prefix_normalized(self):
         built = {"m1": {}}
         models = {"m1": {"skip_gpu_arch": "A100"}}
-        compat, skipped = filter_images_by_skip_gpu_arch(
-            built, models, "NVIDIA A100"
-        )
+        compat, skipped = filter_images_by_skip_gpu_arch(built, models, "NVIDIA A100")
         assert compat == {}
         assert skipped[0][2] == "NVIDIA A100"
 
@@ -110,6 +108,7 @@ def test_skip_gpu_arch_no_match_included(self):
 
 # ---- orchestrator init and validation ----
 
+
 @pytest.mark.unit
 class TestBuildOrchestratorInit:
     """Test Build Orchestrator initialization."""
@@ -221,7 +220,9 @@ def test_skip_after_build_skips_execute_local(self, mock_cleanup, tmp_path):
 
         orchestrator = RunOrchestrator(mock_args)
 
-        with patch.object(RunOrchestrator, "_build_phase", return_value=str(manifest_path)):
+        with patch.object(
+            RunOrchestrator, "_build_phase", return_value=str(manifest_path)
+        ):
             with patch.object(
                 RunOrchestrator, "_load_and_merge_manifest", side_effect=lambda f: f
             ):
@@ -267,7 +268,9 @@ def test_skip_ignored_when_run_only_still_calls_execute_local(
                 "successful_runs": [],
                 "failed_runs": [],
             }
-            orchestrator.execute(manifest_file=str(manifest_path), tags=None, timeout=60)
+            orchestrator.execute(
+                manifest_file=str(manifest_path), tags=None, timeout=60
+            )
 
         mock_local.assert_called_once()
         mock_cleanup.assert_called()
diff --git a/tests/unit/test_primus.py b/tests/unit/test_primus.py
index 36e187b0..63790cd0 100644
--- a/tests/unit/test_primus.py
+++ b/tests/unit/test_primus.py
@@ -19,14 +19,15 @@
     merged_primus_config,
 )
 
-
 # --- K8s mixin: _generate_primus_command -------------------------------------
 
 
 class _PrimusCommandHarness(KubernetesLauncherMixin):
     """Minimal object with attributes _generate_primus_command expects."""
 
-    def __init__(self, additional_context, job_name="madengine-test", namespace="default"):
+    def __init__(
+        self, additional_context, job_name="madengine-test", namespace="default"
+    ):
         self.config = SimpleNamespace(additional_context=additional_context)
         self.job_name = job_name
         self.namespace = namespace
@@ -72,7 +73,9 @@ def test_single_node_with_backend_override(self):
                 }
             }
         )
-        cmd = h._generate_primus_command(1, 4, 1234, "scripts/primus_pretrain/run.sh", "")
+        cmd = h._generate_primus_command(
+            1, 4, 1234, "scripts/primus_pretrain/run.sh", ""
+        )
         assert 'export BACKEND="MaxText"' in cmd
         assert "PRIMUS_CONFIG_PATH=" in cmd
 
@@ -80,7 +83,9 @@ def test_multi_node_service_dns(self):
         h = _PrimusCommandHarness(
             {"distributed": {"primus": {}}}, job_name="madengine-j", namespace="ns1"
         )
-        cmd = h._generate_primus_command(2, 8, 1234, "scripts/primus_pretrain/run.sh", "")
+        cmd = h._generate_primus_command(
+            2, 8, 1234, "scripts/primus_pretrain/run.sh", ""
+        )
         assert "madengine-j-0.madengine-j.ns1.svc.cluster.local" in cmd
         assert "JOB_COMPLETION_INDEX" in cmd
         assert "NNODES=2" in cmd
@@ -94,7 +99,9 @@ def test_multi_node_master_dns_uses_short_headless_subdomain(self):
             {"distributed": {"primus": {}}}, job_name=long_job, namespace="ns1"
         )
         h.service_name = sub
-        cmd = h._generate_primus_command(2, 8, 1234, "scripts/primus_pretrain/run.sh", "")
+        cmd = h._generate_primus_command(
+            2, 8, 1234, "scripts/primus_pretrain/run.sh", ""
+        )
         assert f"{long_job}-0.{sub}.ns1.svc.cluster.local" in cmd
 
 
diff --git a/tests/unit/test_reporting.py b/tests/unit/test_reporting.py
index 844137e9..799e887f 100644
--- a/tests/unit/test_reporting.py
+++ b/tests/unit/test_reporting.py
@@ -1,15 +1,12 @@
 """Unit tests for reporting: update_perf_csv and PERF_CSV_HEADER."""
 
+import json
 import os
 import tempfile
 
-import json
 import pandas as pd
 
-from madengine.reporting.update_perf_csv import (
-    PERF_CSV_HEADER,
-    update_perf_csv,
-)
+from madengine.reporting.update_perf_csv import PERF_CSV_HEADER, update_perf_csv
 
 
 class TestPerfCsvHeader:
diff --git a/tests/unit/test_reporting_superset.py b/tests/unit/test_reporting_superset.py
index 622c9b75..a7bfe808 100644
--- a/tests/unit/test_reporting_superset.py
+++ b/tests/unit/test_reporting_superset.py
@@ -9,14 +9,17 @@
 
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
+
 # built-in modules
 import os
 import json
 import tempfile
 import shutil
+
 # 3rd party modules
 import pytest
 import pandas as pd
+
 # project modules
 from madengine.utils.config_parser import ConfigParser
 from madengine.reporting.update_perf_super import (
@@ -36,126 +39,111 @@ def tmp_dir(self):
         yield temp_dir
         if os.path.exists(temp_dir):
             shutil.rmtree(temp_dir)
-    
+
     @pytest.fixture
     def fixtures_dir(self):
         """Get path to dummy fixtures directory."""
         return os.path.join(
-            os.path.dirname(__file__),
-            '..',
-            'fixtures',
-            'dummy',
-            'scripts',
-            'dummy'
+            os.path.dirname(__file__), "..", "fixtures", "dummy", "scripts", "dummy"
         )
-    
+
     @pytest.fixture
     def config_file(self, fixtures_dir):
         """Get path to config file."""
-        return os.path.join(fixtures_dir, 'configs', 'default.csv')
-    
+        return os.path.join(fixtures_dir, "configs", "default.csv")
+
     def test_config_file_exists(self, config_file):
         """Test that the dummy config file exists."""
-        assert os.path.exists(config_file), \
-            f"Config file should exist at {config_file}"
-    
+        assert os.path.exists(config_file), f"Config file should exist at {config_file}"
+
     def test_config_parser_loads_csv(self, config_file):
         """Test that ConfigParser can load the dummy CSV config."""
         parser = ConfigParser()
         configs = parser.load_config_file(config_file)
-        
+
         assert configs is not None, "Configs should not be None"
         assert isinstance(configs, list), "Configs should be a list"
         assert len(configs) == 3, "Should have 3 config rows"
-        
+
         # Check first config has expected fields
         first_config = configs[0]
-        assert 'model' in first_config
-        assert 'benchmark' in first_config
-        assert 'config_value' in first_config
-        assert 'batch_size' in first_config
-        assert 'datatype' in first_config
-        assert 'max_tokens' in first_config
-        
+        assert "model" in first_config
+        assert "benchmark" in first_config
+        assert "config_value" in first_config
+        assert "batch_size" in first_config
+        assert "datatype" in first_config
+        assert "max_tokens" in first_config
+
         # Verify values
-        assert first_config['model'] == 'dummy/model-1'
-        assert first_config['benchmark'] == 'throughput'
-        assert first_config['datatype'] == 'float16'
-        assert first_config['batch_size'] == 8
-        assert first_config['config_value'] == 128
-        assert first_config['max_tokens'] == 1024
-    
+        assert first_config["model"] == "dummy/model-1"
+        assert first_config["benchmark"] == "throughput"
+        assert first_config["datatype"] == "float16"
+        assert first_config["batch_size"] == 8
+        assert first_config["config_value"] == 128
+        assert first_config["max_tokens"] == 1024
+
     def test_config_parser_from_args(self, fixtures_dir):
         """Test parsing config path from args string."""
         parser = ConfigParser(scripts_base_dir=fixtures_dir)
         args_string = "--config configs/default.csv"
-        
+
         config_path = parser.parse_config_from_args(
-            args_string,
-            os.path.join(fixtures_dir, 'run.sh')
+            args_string, os.path.join(fixtures_dir, "run.sh")
         )
-        
+
         assert config_path is not None, "Config path should be found"
-        assert os.path.exists(config_path), \
-            f"Config file should exist at {config_path}"
-    
+        assert os.path.exists(config_path), f"Config file should exist at {config_path}"
+
     def test_config_parser_parse_and_load(self, fixtures_dir):
         """Test parse_and_load convenience method."""
         parser = ConfigParser(scripts_base_dir=fixtures_dir)
         args_string = "--batch-size 32 --config configs/default.csv"
-        
+
         configs = parser.parse_and_load(args_string, fixtures_dir)
-        
+
         assert configs is not None, "Configs should be loaded"
         assert isinstance(configs, list), "Configs should be a list"
         assert len(configs) == 3, "Should have 3 config rows"
-    
+
     def test_config_parser_no_config_arg(self, fixtures_dir):
         """Test handling when no --config argument is present."""
         parser = ConfigParser(scripts_base_dir=fixtures_dir)
         args_string = "--batch-size 32 --epochs 10"
-        
+
         configs = parser.parse_and_load(args_string, fixtures_dir)
-        
+
         assert configs is None, "Should return None when no config argument"
-    
+
     def test_config_parser_match_config_to_result(self, config_file):
         """Test matching configs to results."""
         parser = ConfigParser()
         configs = parser.load_config_file(config_file)
-        
+
         # Test matching with model name
-        result_data = {
-            'model': 'dummy/model-1',
-            'benchmark': 'throughput'
-        }
-        
-        matched = parser.match_config_to_result(configs, result_data, 'dummy/model-1')
-        
+        result_data = {"model": "dummy/model-1", "benchmark": "throughput"}
+
+        matched = parser.match_config_to_result(configs, result_data, "dummy/model-1")
+
         assert matched is not None, "Should match a config"
-        assert matched['model'] == 'dummy/model-1'
-        assert matched['benchmark'] == 'throughput'
-    
+        assert matched["model"] == "dummy/model-1"
+        assert matched["benchmark"] == "throughput"
+
     def test_config_parser_json_file(self, tmp_dir):
         """Test loading JSON config file."""
         # Create a JSON config file
-        json_config = {
-            "batch_size": 32,
-            "learning_rate": 0.001,
-            "epochs": 10
-        }
-        
+        json_config = {"batch_size": 32, "learning_rate": 0.001, "epochs": 10}
+
         json_path = os.path.join(tmp_dir, "config.json")
-        with open(json_path, 'w') as f:
+        with open(json_path, "w") as f:
             json.dump(json_config, f)
 
         parser = ConfigParser()
         configs = parser.load_config_file(json_path)
-        
+
         assert configs is not None, "Configs should be loaded"
         assert isinstance(configs, dict), "JSON config should be a dict"
-        assert configs['batch_size'] == 32
-        assert configs['learning_rate'] == 0.001
+        assert configs["batch_size"] == 32
+        assert configs["learning_rate"] == 0.001
 
 
 class TestPerfEntrySuperGeneration:
@@ -168,19 +156,14 @@ def tmp_dir(self):
         yield temp_dir
         if os.path.exists(temp_dir):
             shutil.rmtree(temp_dir)
-    
+
     @pytest.fixture
     def fixtures_dir(self):
         """Get path to dummy fixtures directory."""
         return os.path.join(
-            os.path.dirname(__file__),
-            '..',
-            'fixtures',
-            'dummy',
-            'scripts',
-            'dummy'
+            os.path.dirname(__file__), "..", "fixtures", "dummy", "scripts", "dummy"
         )
-    
+
     def test_perf_entry_super_json_structure(self, tmp_dir, fixtures_dir):
         """Test that perf_super.json has the correct structure."""
         # Create mock data
@@ -214,12 +197,12 @@ def test_perf_entry_super_json_structure(self, tmp_dir, fixtures_dir):
 
         # Create common_info.json
         common_info_path = os.path.join(tmp_dir, "common_info.json")
-        with open(common_info_path, 'w') as f:
+        with open(common_info_path, "w") as f:
             json.dump(common_info, f)
-        
+
         # Create results CSV
         results_csv = os.path.join(tmp_dir, "perf_dummy_super.csv")
-        with open(results_csv, 'w') as f:
+        with open(results_csv, "w") as f:
             f.write("model,performance,metric,status\n")
             f.write("dummy/model-1,1234.56,tokens/s,SUCCESS\n")
             f.write("dummy/model-2,2345.67,requests/s,SUCCESS\n")
@@ -227,54 +210,61 @@ def test_perf_entry_super_json_structure(self, tmp_dir, fixtures_dir):
 
         # Generate perf_super.json (cumulative)
         perf_super_path = os.path.join(tmp_dir, "perf_super.json")
-        
+
         update_perf_super_json(
             perf_super_json=perf_super_path,
             multiple_results=results_csv,
             common_info=common_info_path,
             model_name="dummy_perf_super",
-            scripts_base_dir=fixtures_dir
+            scripts_base_dir=fixtures_dir,
         )
-        
+
         # Verify file was created
-        assert os.path.exists(perf_super_path), \
-            "perf_super.json should be created"
-        
+        assert os.path.exists(perf_super_path), "perf_super.json should be created"
+
         # Load and verify structure
-        with open(perf_super_path, 'r') as f:
+        with open(perf_super_path, "r") as f:
             data = json.load(f)
-        
+
         assert isinstance(data, list), "Data should be a list"
         assert len(data) == 3, "Should have 3 result records"
-        
+
         # Check first record structure
         first_record = data[0]
-        
+
         # Verify all common fields are present
         required_fields = [
-            'model', 'performance', 'metric', 'status', 'pipeline',
-            'n_gpus', 'args', 'tags', 'gpu_architecture'
+            "model",
+            "performance",
+            "metric",
+            "status",
+            "pipeline",
+            "n_gpus",
+            "args",
+            "tags",
+            "gpu_architecture",
         ]
         for field in required_fields:
             assert field in first_record, f"Field '{field}' should be present"
-        
+
         # Verify configs field is present
-        assert 'configs' in first_record, "configs field should be present"
-        
+        assert "configs" in first_record, "configs field should be present"
+
         # Verify configs is not None (config file was found and loaded)
-        assert first_record['configs'] is not None, \
-            "configs should not be None when config file exists"
-        
+        assert (
+            first_record["configs"] is not None
+        ), "configs should not be None when config file exists"
+
         # Verify configs has expected structure
-        configs = first_record['configs']
+        configs = first_record["configs"]
         assert isinstance(configs, dict), "configs should be a dict"
-        assert 'model' in configs
-        assert 'benchmark' in configs
-        assert 'config_value' in configs
-        assert 'batch_size' in configs
-        assert 'datatype' in configs
-        assert 'max_tokens' in configs
-    
+        assert "model" in configs
+        assert "benchmark" in configs
+        assert "config_value" in configs
+        assert "batch_size" in configs
+        assert "datatype" in configs
+        assert "max_tokens" in configs
+
     def test_perf_entry_super_config_matching(self, tmp_dir, fixtures_dir):
         """Test that configs are correctly matched for all results."""
         # Create mock data
@@ -305,14 +295,14 @@ def test_perf_entry_super_config_matching(self, tmp_dir, fixtures_dir):
             "build_number": "",
             "additional_docker_run_options": "",
         }
-        
+
         common_info_path = os.path.join(tmp_dir, "common_info_super.json")
-        with open(common_info_path, 'w') as f:
+        with open(common_info_path, "w") as f:
             json.dump(common_info, f)
 
         # Create results CSV
         results_csv = os.path.join(tmp_dir, "perf_dummy_super.csv")
-        with open(results_csv, 'w') as f:
+        with open(results_csv, "w") as f:
             f.write("model,performance,metric,benchmark\n")
             f.write("dummy/model-1,1234.56,tokens/s,throughput\n")
             f.write("dummy/model-2,2345.67,requests/s,serving\n")
@@ -325,33 +315,33 @@ def test_perf_entry_super_config_matching(self, tmp_dir, fixtures_dir):
             multiple_results=results_csv,
             common_info=common_info_path,
             model_name="dummy_perf_super",
-            scripts_base_dir=fixtures_dir
+            scripts_base_dir=fixtures_dir,
         )
 
         # Load and verify matching
-        with open(perf_super_path, 'r') as f:
+        with open(perf_super_path, "r") as f:
             data = json.load(f)
-        
+
         # Verify each result has configs
         assert len(data) == 3, "Should have 3 results"
-        
+
         for record in data:
-            configs = record.get('configs')
+            configs = record.get("configs")
             assert configs is not None, "Each record should have configs"
             assert isinstance(configs, dict), "Configs should be a dict"
-            
+
             # Verify configs have expected structure (from default.csv)
-            assert 'model' in configs
-            assert 'benchmark' in configs
-            assert 'config_value' in configs
-            assert 'batch_size' in configs
-            assert 'datatype' in configs
-            assert 'max_tokens' in configs
-            
+            assert "model" in configs
+            assert "benchmark" in configs
+            assert "config_value" in configs
+            assert "batch_size" in configs
+            assert "datatype" in configs
+            assert "max_tokens" in configs
+
             # Verify configs values are from our config file
-            assert configs['benchmark'] in ['throughput', 'serving', 'latency']
-            assert configs['datatype'] in ['float16', 'float32', 'bfloat16']
-    
+            assert configs["benchmark"] in ["throughput", "serving", "latency"]
+            assert configs["datatype"] in ["float16", "float32", "bfloat16"]
+
     def test_perf_entry_super_no_config(self, tmp_dir, fixtures_dir):
         """Test handling when no config file is specified."""
         # Create mock data without config
@@ -382,37 +372,38 @@ def test_perf_entry_super_no_config(self, tmp_dir, fixtures_dir):
             "build_number": "",
             "additional_docker_run_options": "",
         }
-        
+
         common_info_path = os.path.join(tmp_dir, "common_info_super.json")
-        with open(common_info_path, 'w') as f:
+        with open(common_info_path, "w") as f:
             json.dump(common_info, f)
 
         # Create results CSV
         results_csv = os.path.join(tmp_dir, "perf_dummy_super.csv")
-        with open(results_csv, 'w') as f:
+        with open(results_csv, "w") as f:
             f.write("model,performance,metric\n")
             f.write("dummy-no-config,1234.56,tokens/s\n")
 
         perf_super_path = os.path.join(tmp_dir, "perf_super.json")
-        
+
         update_perf_super_json(
             perf_super_json=perf_super_path,
             multiple_results=results_csv,
             common_info=common_info_path,
             model_name="dummy_no_config",
-            scripts_base_dir=fixtures_dir
+            scripts_base_dir=fixtures_dir,
         )
-        
+
         # Load and verify
-        with open(perf_super_path, 'r') as f:
+        with open(perf_super_path, "r") as f:
             data = json.load(f)
-        
+
         assert len(data) == 1, "Should have 1 result"
-        
+
         # Verify configs is None when no config file
-        assert data[0]['configs'] is None, \
-            "configs should be None when no config file specified"
-    
+        assert (
+            data[0]["configs"] is None
+        ), "configs should be None when no config file specified"
+
     def test_perf_entry_super_multi_results(self, tmp_dir, fixtures_dir):
         """Test handling of multiple result metrics."""
         common_info = {
@@ -442,18 +433,20 @@ def test_perf_entry_super_multi_results(self, tmp_dir, fixtures_dir):
             "build_number": "",
             "additional_docker_run_options": "",
         }
-        
+
         common_info_path = os.path.join(tmp_dir, "common_info_super.json")
-        with open(common_info_path, 'w') as f:
+        with open(common_info_path, "w") as f:
             json.dump(common_info, f)
 
         # Create results CSV with extra metrics
         results_csv = os.path.join(tmp_dir, "perf_multi_metrics.csv")
-        with open(results_csv, 'w') as f:
-            f.write("model,performance,metric,throughput,latency_mean_ms,latency_p50_ms,latency_p90_ms,gpu_memory_used_mb\n")
+        with open(results_csv, "w") as f:
+            f.write(
+                "model,performance,metric,throughput,latency_mean_ms,latency_p50_ms,latency_p90_ms,gpu_memory_used_mb\n"
+            )
             f.write("model-1,1234.56,tokens/s,1234.56,8.1,7.9,12.3,12288\n")
             f.write("model-2,2345.67,requests/s,2345.67,4.3,4.1,6.8,16384\n")
-        
+
         perf_super_path = os.path.join(tmp_dir, "perf_super.json")
 
         update_perf_super_json(
@@ -461,35 +454,37 @@ def test_perf_entry_super_multi_results(self, tmp_dir, fixtures_dir):
             multiple_results=results_csv,
             common_info=common_info_path,
             model_name="test_multi_metrics",
-            scripts_base_dir=fixtures_dir
+            scripts_base_dir=fixtures_dir,
         )
-        
+
         # Load and verify
-        with open(perf_super_path, 'r') as f:
+        with open(perf_super_path, "r") as f:
             data = json.load(f)
-        
+
         assert len(data) == 2, "Should have 2 results"
-        
+
         # Check first result has multi_results with extra metrics
         first_result = data[0]
-        assert 'multi_results' in first_result, "Should have multi_results field"
-        assert first_result['multi_results'] is not None, "multi_results should not be None"
-        
-        multi_results = first_result['multi_results']
+        assert "multi_results" in first_result, "Should have multi_results field"
+        assert (
+            first_result["multi_results"] is not None
+        ), "multi_results should not be None"
+
+        multi_results = first_result["multi_results"]
         assert isinstance(multi_results, dict), "multi_results should be a dict"
-        
+
         # Verify extra metrics are in multi_results
-        assert 'throughput' in multi_results
-        assert 'latency_mean_ms' in multi_results
-        assert 'latency_p50_ms' in multi_results
-        assert 'latency_p90_ms' in multi_results
-        assert 'gpu_memory_used_mb' in multi_results
-        
+        assert "throughput" in multi_results
+        assert "latency_mean_ms" in multi_results
+        assert "latency_p50_ms" in multi_results
+        assert "latency_p90_ms" in multi_results
+        assert "gpu_memory_used_mb" in multi_results
+
         # Verify values
-        assert multi_results['throughput'] == 1234.56
-        assert multi_results['latency_mean_ms'] == 8.1
-        assert multi_results['gpu_memory_used_mb'] == 12288
-    
+        assert multi_results["throughput"] == 1234.56
+        assert multi_results["latency_mean_ms"] == 8.1
+        assert multi_results["gpu_memory_used_mb"] == 12288
+
     def test_perf_entry_super_deployment_fields(self, tmp_dir, fixtures_dir):
         """Test that all deployment-related fields are present."""
         common_info = {
@@ -519,17 +514,17 @@ def test_perf_entry_super_deployment_fields(self, tmp_dir, fixtures_dir):
             "build_number": "",
             "additional_docker_run_options": "",
         }
-        
+
         common_info_path = os.path.join(tmp_dir, "common_info_super.json")
-        with open(common_info_path, 'w') as f:
+        with open(common_info_path, "w") as f:
             json.dump(common_info, f)
 
         # Create results CSV
         results_csv = os.path.join(tmp_dir, "perf_deployment.csv")
-        with open(results_csv, 'w') as f:
+        with open(results_csv, "w") as f:
             f.write("model,performance,metric\n")
             f.write("multi-node-test,5000.0,tokens/s\n")
-        
+
         perf_super_path = os.path.join(tmp_dir, "perf_super.json")
 
         update_perf_super_json(
@@ -537,17 +532,17 @@ def test_perf_entry_super_deployment_fields(self, tmp_dir, fixtures_dir):
             multiple_results=results_csv,
             common_info=common_info_path,
             model_name="test_deployment",
-            scripts_base_dir=fixtures_dir
+            scripts_base_dir=fixtures_dir,
         )
-        
+
         # Load and verify
-        with open(perf_super_path, 'r') as f:
+        with open(perf_super_path, "r") as f:
             data = json.load(f)
-        
+
         assert len(data) == 1, "Should have 1 result"
-        
+
         result = data[0]
-        
+
         # Verify all deployment fields are present
         deployment_fields = {
             "n_gpus": "16",
@@ -557,11 +552,12 @@ def test_perf_entry_super_deployment_fields(self, tmp_dir, fixtures_dir):
             "launcher": "torchrun",
             "machine_name": "node-1",
         }
-        
+
         for field, expected_value in deployment_fields.items():
             assert field in result, f"Field '{field}' should be present"
-            assert result[field] == expected_value, \
-                f"Field '{field}' should be '{expected_value}', got '{result[field]}'"
+            assert (
+                result[field] == expected_value
+            ), f"Field '{field}' should be '{expected_value}', got '{result[field]}'"
 
 
 class TestPerfSuperCSVGeneration:
@@ -596,52 +592,51 @@ def test_csv_generation_from_json(self, tmp_dir):
                 "status": "SUCCESS",
                 "configs": {"batch_size": 64, "learning_rate": 0.002},
                 "multi_results": None,
-            }
+            },
         ]
-        
+
         json_path = os.path.join(tmp_dir, "perf_super.json")
-        with open(json_path, 'w') as f:
+        with open(json_path, "w") as f:
             json.dump(data, f)
 
         # Change to test directory
         original_dir = os.getcwd()
         os.chdir(tmp_dir)
-        
+
         try:
             # Generate CSVs
             update_perf_super_csv(
-                perf_super_json="perf_super.json",
-                perf_super_csv="perf_super.csv"
+                perf_super_json="perf_super.json", perf_super_csv="perf_super.csv"
             )
-            
+
             # Verify files exist
-            assert os.path.exists("perf_entry_super.csv"), \
-                "perf_entry_super.csv should be created"
-            assert os.path.exists("perf_super.csv"), \
-                "perf_super.csv should be created"
-            
+            assert os.path.exists(
+                "perf_entry_super.csv"
+            ), "perf_entry_super.csv should be created"
+            assert os.path.exists("perf_super.csv"), "perf_super.csv should be created"
+
             # Load and verify perf_entry_super.csv (latest entry only)
             entry_df = pd.read_csv("perf_entry_super.csv")
             assert len(entry_df) == 1, "Should have 1 entry (latest)"
-            assert entry_df.iloc[0]['model'] == "test_model_2"
-            
+            assert entry_df.iloc[0]["model"] == "test_model_2"
+
             # Load and verify perf_super.csv (all entries)
             super_df = pd.read_csv("perf_super.csv")
             assert len(super_df) == 2, "Should have 2 entries (all)"
-            
+
             # Verify configs column is JSON string
-            assert 'configs' in super_df.columns
-            first_configs = json.loads(super_df.iloc[0]['configs'])
-            assert first_configs['batch_size'] == 32
-            
+            assert "configs" in super_df.columns
+            first_configs = json.loads(super_df.iloc[0]["configs"])
+            assert first_configs["batch_size"] == 32
+
             # Verify multi_results column
-            assert 'multi_results' in super_df.columns
-            first_multi = json.loads(super_df.iloc[0]['multi_results'])
-            assert first_multi['throughput'] == 1234.56
-            
+            assert "multi_results" in super_df.columns
+            first_multi = json.loads(super_df.iloc[0]["multi_results"])
+            assert first_multi["throughput"] == 1234.56
+
         finally:
             os.chdir(original_dir)
-    
+
     def test_csv_handles_none_values(self, tmp_dir):
         """Test that CSV generation handles None values correctly."""
         data = [
@@ -655,31 +650,33 @@ def test_csv_handles_none_values(self, tmp_dir):
         ]
 
         json_path = os.path.join(tmp_dir, "perf_super.json")
-        with open(json_path, 'w') as f:
+        with open(json_path, "w") as f:
             json.dump(data, f)
 
         original_dir = os.getcwd()
         os.chdir(tmp_dir)
-        
+
         try:
             update_perf_super_csv(
-                perf_super_json="perf_super.json",
-                perf_super_csv="perf_super.csv"
+                perf_super_json="perf_super.json", perf_super_csv="perf_super.csv"
             )
-            
+
             # Load CSV
             df = pd.read_csv("perf_super.csv")
-            
+
             # Verify None values are handled
-            assert pd.isna(df.iloc[0]['configs']) or df.iloc[0]['configs'] == ''
-            assert pd.isna(df.iloc[0]['multi_results']) or df.iloc[0]['multi_results'] == ''
-            
+            assert pd.isna(df.iloc[0]["configs"]) or df.iloc[0]["configs"] == ""
+            assert (
+                pd.isna(df.iloc[0]["multi_results"])
+                or df.iloc[0]["multi_results"] == ""
+            )
+
         finally:
             os.chdir(original_dir)
-    
+
     def test_csv_multiple_entries_in_entry_file(self, tmp_dir):
         """Test that perf_entry_super.csv can contain multiple entries from current run.
-        
+
         This tests the fix for the issue where perf_entry.csv and perf_entry.json
         had 4 entries (for multiple results) but perf_entry_super.csv only had 1.
         Now perf_entry_super.csv should contain all entries from the current run.
@@ -732,46 +729,54 @@ def test_csv_multiple_entries_in_entry_file(self, tmp_dir):
                 "status": "SUCCESS",
                 "configs": None,
                 "multi_results": {"temperature": 45678},
-            }
+            },
         ]
-        
+
         json_path = os.path.join(tmp_dir, "perf_super.json")
-        with open(json_path, 'w') as f:
+        with open(json_path, "w") as f:
             json.dump(data, f)
 
         original_dir = os.getcwd()
         os.chdir(tmp_dir)
-        
+
         try:
             # Generate CSVs with num_entries=4 (simulating 4 entries added in current run)
             update_perf_super_csv(
                 perf_super_json="perf_super.json",
                 perf_super_csv="perf_super.csv",
-                num_entries=4
+                num_entries=4,
             )
-            
+
             # Verify perf_entry_super.csv has ALL 4 entries from current run
             entry_df = pd.read_csv("perf_entry_super.csv")
-            assert len(entry_df) == 4, \
-                f"perf_entry_super.csv should have 4 entries, got {len(entry_df)}"
-            
+            assert (
+                len(entry_df) == 4
+            ), f"perf_entry_super.csv should have 4 entries, got {len(entry_df)}"
+
             # Verify the models are the 4 from the current run (not the old one)
-            models = entry_df['model'].tolist()
-            expected_models = ['dummy_multi_1', 'dummy_multi_2', 'dummy_multi_3', 'dummy_multi_4']
-            assert models == expected_models, \
-                f"Expected {expected_models}, got {models}"
-            
+            models = entry_df["model"].tolist()
+            expected_models = [
+                "dummy_multi_1",
+                "dummy_multi_2",
+                "dummy_multi_3",
+                "dummy_multi_4",
+            ]
+            assert (
+                models == expected_models
+            ), f"Expected {expected_models}, got {models}"
+
             # Verify perf_super.csv has ALL 5 entries (old + new)
             super_df = pd.read_csv("perf_super.csv")
-            assert len(super_df) == 5, \
-                f"perf_super.csv should have 5 entries (1 old + 4 new), got {len(super_df)}"
-            
+            assert (
+                len(super_df) == 5
+            ), f"perf_super.csv should have 5 entries (1 old + 4 new), got {len(super_df)}"
+
             # Verify all models are in perf_super.csv
-            all_models = super_df['model'].tolist()
-            assert 'old_model' in all_models, "Old model should be in perf_super.csv"
-            assert all(m in all_models for m in expected_models), \
-                "All new models should be in perf_super.csv"
-            
+            all_models = super_df["model"].tolist()
+            assert "old_model" in all_models, "Old model should be in perf_super.csv"
+            assert all(
+                m in all_models for m in expected_models
+            ), "All new models should be in perf_super.csv"
+
         finally:
             os.chdir(original_dir)
-
diff --git a/tests/unit/test_rocm_path.py b/tests/unit/test_rocm_path.py
index 2f2cbf7a..9f5cb41d 100644
--- a/tests/unit/test_rocm_path.py
+++ b/tests/unit/test_rocm_path.py
@@ -5,6 +5,7 @@
 """
 
 import os
+
 import pytest
 
 from madengine.core.constants import get_rocm_path
@@ -70,18 +71,25 @@ def test_context_build_only_mad_rocm_path(self):
 
     def test_context_runtime_includes_rocm_path_in_ctx(self):
         """Context stores host rocm_path; in-container ROCM_PATH is set at run time."""
-        from madengine.core.context import Context
         from unittest.mock import patch
+
+        from madengine.core.context import Context
         from madengine.utils.rocm_path_resolver import normalize_rocm_path
 
         ac = repr({MAD_ROCM_PATH: "/my/rocm"})
-        with patch.object(Context, "get_gpu_vendor", return_value="AMD"), \
-             patch.object(Context, "get_system_ngpus", return_value=2), \
-             patch.object(Context, "get_system_gpu_architecture", return_value="gfx90a"), \
-             patch.object(Context, "get_system_gpu_product_name", return_value="MI250"), \
-             patch.object(Context, "get_system_hip_version", return_value="5.4"), \
-             patch.object(Context, "get_docker_gpus", return_value="0-1"), \
-             patch.object(Context, "get_gpu_renderD_nodes", return_value=None):
+        with patch.object(Context, "get_gpu_vendor", return_value="AMD"), patch.object(
+            Context, "get_system_ngpus", return_value=2
+        ), patch.object(
+            Context, "get_system_gpu_architecture", return_value="gfx90a"
+        ), patch.object(
+            Context, "get_system_gpu_product_name", return_value="MI250"
+        ), patch.object(
+            Context, "get_system_hip_version", return_value="5.4"
+        ), patch.object(
+            Context, "get_docker_gpus", return_value="0-1"
+        ), patch.object(
+            Context, "get_gpu_renderD_nodes", return_value=None
+        ):
             ctx = Context(additional_context=ac)
             exp = normalize_rocm_path("/my/rocm")
             assert ctx.ctx.get("rocm_path") == exp
@@ -90,8 +98,9 @@ def test_context_runtime_includes_rocm_path_in_ctx(self):
 
     def test_context_container_rocm_path_preserved_at_init(self):
         """docker_env_vars.ROCM_PATH is preserved at context init; finalize normalizes at run time."""
-        from madengine.core.context import Context
         from unittest.mock import patch
+
+        from madengine.core.context import Context
         from madengine.utils.rocm_path_resolver import normalize_rocm_path
 
         ac = repr(
@@ -100,13 +109,19 @@ def test_context_container_rocm_path_preserved_at_init(self):
                 "docker_env_vars": {"ROCM_PATH": "/in/image"},
             }
         )
-        with patch.object(Context, "get_gpu_vendor", return_value="AMD"), \
-             patch.object(Context, "get_system_ngpus", return_value=2), \
-             patch.object(Context, "get_system_gpu_architecture", return_value="gfx90a"), \
-             patch.object(Context, "get_system_gpu_product_name", return_value="MI250"), \
-             patch.object(Context, "get_system_hip_version", return_value="5.4"), \
-             patch.object(Context, "get_docker_gpus", return_value="0-1"), \
-             patch.object(Context, "get_gpu_renderD_nodes", return_value=None):
+        with patch.object(Context, "get_gpu_vendor", return_value="AMD"), patch.object(
+            Context, "get_system_ngpus", return_value=2
+        ), patch.object(
+            Context, "get_system_gpu_architecture", return_value="gfx90a"
+        ), patch.object(
+            Context, "get_system_gpu_product_name", return_value="MI250"
+        ), patch.object(
+            Context, "get_system_hip_version", return_value="5.4"
+        ), patch.object(
+            Context, "get_docker_gpus", return_value="0-1"
+        ), patch.object(
+            Context, "get_gpu_renderD_nodes", return_value=None
+        ):
             ctx = Context(additional_context=ac)
         assert ctx._rocm_path == normalize_rocm_path("/on/host")
         # User-supplied ROCM_PATH is kept in docker_env_vars at init; finalize normalizes at run time.
@@ -257,9 +272,7 @@ def test_rocm_root_from_bin_tool_amd_smi(self, tmp_path):
         smi.chmod(0o755)
         assert _rocm_root_from_bin_tool(str(smi.resolve())) == root.resolve()
 
-    def test_auto_detect_finds_injected_versioned_opt_rocm(
-        self, monkeypatch, tmp_path
-    ):
+    def test_auto_detect_finds_injected_versioned_opt_rocm(self, monkeypatch, tmp_path):
         """Simulate /opt/rocm-7.13.0 without depending on the host /opt tree."""
         vroot = (tmp_path / "rocm-7.13.0").resolve()
         (vroot / "bin").mkdir(parents=True)
@@ -287,15 +300,11 @@ def merged_looks(p):
             return real_looks(p)
 
         monkeypatch.setattr(rpr, "_looks_like_rocm_root", merged_looks)
-        monkeypatch.setattr(
-            rpr, "_versioned_opt_rocm_dirs", lambda: [vroot]
-        )
+        monkeypatch.setattr(rpr, "_versioned_opt_rocm_dirs", lambda: [vroot])
         out = auto_detect_rocm_path()
         assert out == normalize_rocm_path(str(vroot))
 
-    def test_infer_root_from_path_tools_amd_smi(
-        self, monkeypatch, tmp_path
-    ):
+    def test_infer_root_from_path_tools_amd_smi(self, monkeypatch, tmp_path):
         """`which(amd-smi)` → .../rocm-7.13.0/bin/amd-smi` yields root with both smi tools."""
         vroot = (tmp_path / "rocm-7.13.0").resolve()
         (vroot / "bin").mkdir(parents=True)
diff --git a/tests/unit/test_therock_markers.py b/tests/unit/test_therock_markers.py
index c819562e..eb01295a 100644
--- a/tests/unit/test_therock_markers.py
+++ b/tests/unit/test_therock_markers.py
@@ -2,9 +2,10 @@
 Unit tests for TheRock on-disk marker helpers.
 """
 
-import pytest
 from pathlib import Path
 
+import pytest
+
 from madengine.utils.therock_markers import (
     is_therock_tree,
     therock_dist_info_path,
@@ -28,5 +29,8 @@ def test_is_therock_tree_false_without_markers(tmp_path: Path) -> None:
 @pytest.mark.unit
 def test_relpath_helpers_match_expected_layout() -> None:
     root = Path("/opt/rocm-7.0.0")
-    assert therock_manifest_path(root) == root / "share" / "therock" / "therock_manifest.json"
+    assert (
+        therock_manifest_path(root)
+        == root / "share" / "therock" / "therock_manifest.json"
+    )
     assert therock_dist_info_path(root) == root / "share" / "therock" / "dist_info.json"
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 2e09b8c3..2ac5633c 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -6,16 +6,16 @@
 
 import pytest
 
-from madengine.utils.path_utils import scripts_base_dir_from, get_madengine_root
+from madengine.utils.path_utils import get_madengine_root, scripts_base_dir_from
 from madengine.utils.run_details import (
-    get_pipeline,
-    get_build_number,
     flatten_tags_in_place,
+    get_build_number,
+    get_pipeline,
 )
 
-
 # ---- path_utils ----
 
+
 class TestScriptsBaseDirFrom:
     """Test scripts_base_dir_from helper."""
 
@@ -51,8 +51,11 @@ def test_returns_madengine_package_path(self):
 
 # ---- run_details ----
 
+
 class TestGetPipeline:
-    @pytest.mark.parametrize("env_val,expected", [({}, ""), ({"pipeline": "ci-mad"}, "ci-mad")])
+    @pytest.mark.parametrize(
+        "env_val,expected", [({}, ""), ({"pipeline": "ci-mad"}, "ci-mad")]
+    )
     def test_pipeline_from_env_or_default(self, env_val, expected):
         with patch.dict(os.environ, env_val, clear=False):
             if not env_val and "pipeline" in os.environ:
@@ -61,7 +64,9 @@ def test_pipeline_from_env_or_default(self, env_val, expected):
 
 
 class TestGetBuildNumber:
-    @pytest.mark.parametrize("env_val,expected", [({}, "0"), ({"BUILD_NUMBER": "42"}, "42")])
+    @pytest.mark.parametrize(
+        "env_val,expected", [({}, "0"), ({"BUILD_NUMBER": "42"}, "42")]
+    )
     def test_build_number_from_env_or_default(self, env_val, expected):
         with patch.dict(os.environ, env_val, clear=False):
             if not env_val and "BUILD_NUMBER" in os.environ:
diff --git a/tests/unit/test_validators.py b/tests/unit/test_validators.py
index 6a78c52d..4abf7c48 100644
--- a/tests/unit/test_validators.py
+++ b/tests/unit/test_validators.py
@@ -11,12 +11,12 @@
 import pytest
 import typer
 
+from madengine.cli.constants import ExitCode
+from madengine.cli.validators import validate_additional_context
 from madengine.core.additional_context_defaults import (
     DEFAULT_GPU_VENDOR,
     DEFAULT_GUEST_OS,
 )
-from madengine.cli.validators import validate_additional_context
-from madengine.cli.constants import ExitCode
 
 
 class TestValidateAdditionalContext: