diff --git a/benchmark_reporting_tools/post_results.py b/benchmark_reporting_tools/post_results.py
index 318466fc..178fc0c7 100644
--- a/benchmark_reporting_tools/post_results.py
+++ b/benchmark_reporting_tools/post_results.py
@@ -93,10 +93,12 @@ class BenchmarkMetadata:
     kind: str | None = None
     execution_number: int = 1
     worker_count: int | None = None
+    node_count: int | None = None
     scale_factor: int | None = None
     gpu_count: int | None = None
     num_drivers: int | None = None
     gpu_name: str | None = None
+    image_digest: str | None = None
 
     @classmethod
     def from_parsed(cls, raw: dict) -> "BenchmarkMetadata":
@@ -259,8 +261,9 @@ def _parse_args() -> argparse.Namespace:
     )
     parser.add_argument(
         "--identifier-hash",
-        help="Unique identifier hash for software environment (e.g. a container image digest).",
-        required=True,
+        default=None,
+        help="Unique identifier hash for software environment (e.g. a container image digest). "
+        "If omitted, the image_digest from benchmark_result.json context is used.",
     )
     parser.add_argument(
         "--version",
@@ -299,6 +302,26 @@ def _parse_args() -> argparse.Namespace:
         help="Benchmark definition name",
         required=True,
     )
+    parser.add_argument(
+        "--velox-branch",
+        default=None,
+        help="Velox branch used to build the worker image.",
+    )
+    parser.add_argument(
+        "--velox-repo",
+        default=None,
+        help="Velox repository used to build the worker image.",
+    )
+    parser.add_argument(
+        "--presto-branch",
+        default=None,
+        help="Presto branch used to build the worker image.",
+    )
+    parser.add_argument(
+        "--presto-repo",
+        default=None,
+        help="Presto repository used to build the worker image.",
+    )
     parser.add_argument(
         "--concurrency-streams",
         help="Number of concurrency streams to use for the benchmark run",
@@ -355,6 +378,10 @@ def _build_submission_payload(
     is_official: bool,
     asset_ids: list[int] | None = None,
     concurrency_streams: int = 1,
+    velox_branch: str | None = None,
+    velox_repo: str | None = None,
+    presto_branch: str | None = None,
+    presto_repo: str | None = None,
 ) -> dict:
     """Build a BenchmarkSubmission payload from parsed dataclasses.
 
@@ -449,6 +476,16 @@ def _query_sort_key(name: str):
         if v is not None
     }
 
+    engine_config_payload = engine_config.serialize() if engine_config else {}
+    if velox_branch or velox_repo or presto_branch or presto_repo:
+        engine_config_payload = {
+            **engine_config_payload,
+            "velox_branch": velox_branch,
+            "velox_repo": velox_repo,
+            "presto_branch": presto_branch,
+            "presto_repo": presto_repo,
+        }
+
     return {
         "sku_name": sku_name,
         "storage_configuration_name": storage_configuration_name,
@@ -461,11 +498,11 @@ def _query_sort_key(name: str):
             "commit_hash": commit_hash,
         },
         "run_at": benchmark_metadata.timestamp.isoformat(),
-        "node_count": 1,
+        "node_count": benchmark_metadata.node_count or 1,
         "gpu_count": benchmark_metadata.gpu_count or 0,
         "query_logs": query_logs,
         "concurrency_streams": concurrency_streams,
-        "engine_config": engine_config.serialize() if engine_config else {},
+        "engine_config": engine_config_payload,
         "extra_info": extra_info,
         "is_official": is_official,
         "asset_ids": asset_ids,
@@ -550,7 +587,7 @@ async def _process_benchmark_dir(
     storage_configuration_name: str,
     cache_state: str,
     engine_name: str | None,
-    identifier_hash: str,
+    identifier_hash: str | None,
     version: str | None,
     commit_hash: str | None,
     is_official: bool,
@@ -563,6 +600,10 @@ async def _process_benchmark_dir(
     concurrency_streams: int = 1,
     config_dir: Path | None = None,
     logs_dir: Path | None = None,
+    velox_branch: str | None = None,
+    velox_repo: str | None = None,
+    presto_branch: str | None = None,
+    presto_repo: str | None = None,
 ) -> int:
     """Process a benchmark directory and post results to API.
 
@@ -589,6 +630,18 @@ async def _process_benchmark_dir(
         print(f"  Error loading metadata: {e}", file=sys.stderr)
         return 1
 
+    # Fall back to the container image_digest captured in the benchmark
+    # results context when no explicit identifier_hash was provided on the CLI.
+    if identifier_hash is None:
+        identifier_hash = benchmark_metadata.image_digest
+    if identifier_hash is None:
+        print(
+            "  Error: --identifier-hash was not provided and benchmark_result.json "
+            "context has no image_digest to fall back to.",
+            file=sys.stderr,
+        )
+        return 1
+
     # Resolve config directory: explicit override → auto-detect from variant
     effective_config_dir = config_dir
     variant = _ENGINE_TO_VARIANT.get(benchmark_metadata.engine)
@@ -669,6 +722,10 @@ async def _process_benchmark_dir(
                 is_official=is_official,
                 asset_ids=asset_ids,
                 concurrency_streams=concurrency_streams,
+                velox_branch=velox_branch,
+                velox_repo=velox_repo,
+                presto_branch=presto_branch,
+                presto_repo=presto_repo,
             )
         except Exception as e:
             print(f"  Error building payload for '{bench_name}': {e}", file=sys.stderr)
@@ -747,6 +804,10 @@ async def main() -> int:
         concurrency_streams=args.concurrency_streams,
         config_dir=Path(args.config_dir) if args.config_dir else None,
         logs_dir=Path(args.logs_dir) if args.logs_dir else None,
+        velox_branch=args.velox_branch,
+        velox_repo=args.velox_repo,
+        presto_branch=args.presto_branch,
+        presto_repo=args.presto_repo,
     )
 
     return result
diff --git a/presto/docker/config/template/etc_coordinator/config_native.properties b/presto/docker/config/template/etc_coordinator/config_native.properties
index 3e6ad0dc..dfd73fc4 100644
--- a/presto/docker/config/template/etc_coordinator/config_native.properties
+++ b/presto/docker/config/template/etc_coordinator/config_native.properties
@@ -58,7 +58,7 @@ query.execution-policy=phased
 # Kill queries based on total reservation on blocked nodes to recover memory.
 query.low-memory-killer.policy=total-reservation-on-blocked-nodes
 # Upper limit on query wall time to keep tests bounded.
-query.max-execution-time=10m
+query.max-execution-time=60m
 # Keep metadata of up to 1000 queries for UI and debugging.
 query.max-history=1000
 # Memory quotas per node and cluster to protect stability.
diff --git a/presto/scripts/generate_presto_config.sh b/presto/scripts/generate_presto_config.sh
index 611b4393..25f39cb7 100755
--- a/presto/scripts/generate_presto_config.sh
+++ b/presto/scripts/generate_presto_config.sh
@@ -135,7 +135,10 @@ EOF
   fi
 
   if [[ "${VARIANT_TYPE}" == "cpu" ]]; then
-    echo "cluster-tag=native-cpu" >>${COORD_CONFIG}
+    echo "cluster-tag=native-cpu" >> ${COORD_CONFIG}
+    # cuDF has no effect in CPU mode but leaving cudf.enabled=true in the worker
+    # config causes noisy startup warnings; force it off for CPU runs.
+    sed -i 's/^cudf\.enabled=true/cudf.enabled=false/' ${WORKER_CONFIG}
   fi
 
   # for Java variant, disable some Parquet properties which are now rejected
@@ -162,7 +165,7 @@ fi
 
 # We want to propagate any changes from the original worker config to the new worker configs even if
 # we did not re-generate the configs.
-if [[ -n "$NUM_WORKERS" && "$VARIANT_TYPE" == "gpu" ]]; then
+if [[ -n "$NUM_WORKERS" ]]; then
   if [[ -n ${GPU_IDS:-} ]]; then
     WORKER_IDS=($(echo "$GPU_IDS" | tr ',' ' '))
   else
diff --git a/presto/slurm/presto-nvl72/README.md b/presto/slurm/presto-nvl72/README.md
index 007f3bcd..1d71508f 100644
--- a/presto/slurm/presto-nvl72/README.md
+++ b/presto/slurm/presto-nvl72/README.md
@@ -6,72 +6,124 @@ This directory contains scripts for running Presto TPC-H benchmarks on CoreWeave
 
 ```
 presto-nvl72/
-├── run-presto-benchmarks.slurm  # Main slurm job script with configuration
-├── run-presto-benchmarks.sh     # Execution script
-├── launch-run.sh                # Convenience launcher
-├── functions.sh                 # Presto helper functions
-├── echo_helpers.sh              # Logging helpers
-├── logs/                        # Execution logs
-└── result_dir/                  # Benchmark results
+├── defaults.env                     # Cluster-specific path defaults (override via env)
+├── functions.sh                     # Presto helper functions
+├── echo_helpers.sh                  # Logging helpers
+├── enroot-decompress.sh             # Auto-detecting decompressor for enroot image pulls
+│
+├── pull_ghcr_image.sh               # Pull a GHCR image and save as .sqsh
+│
+├── launch-run.sh                    # Submit a benchmark run job
+├── run-presto-benchmarks.slurm      # SLURM job script for benchmarks
+├── run-presto-benchmarks.sh         # Benchmark execution logic
+│
+├── launch-analyze-tables.sh         # Submit an analyze-tables job
+├── run-analyze-tables.slurm         # SLURM job script for ANALYZE TABLE
+├── run-analyze-tables.sh            # Analyze-tables execution logic
+│
+├── launch-gen-data.sh               # Submit a TPC-H data generation job
+├── gen-tpch-data.slurm              # SLURM job script for data generation
+│
+├── run-sweep.sh                     # Run benchmark + post results for a sweep of configs
+├── run_interactive.sh               # Start an interactive Presto session
+│
+├── logs/                            # Execution logs
+└── result_dir/                      # Benchmark results
 ```
 
 ## Quick Start
 
-### Running the benchmark via launcher (recommended)
+### 1. Pull container images
+
+Images must be pre-pulled as `.sqsh` files before running benchmarks:
 
 ```bash
 cd presto/slurm/presto-nvl72
-./launch-run.sh -n <nodes> -s <scale_factor> [-i <iterations>] [additional sbatch options]
+./pull_ghcr_image.sh ghcr.io/rapidsai/velox-testing-images:<coordinator-tag>
+./pull_ghcr_image.sh ghcr.io/rapidsai/velox-testing-images:<worker-tag>
+```
+
+Images are saved to `${IMAGE_DIR}` (default: `/scratch/${USER}/images/presto`).
+
+### 2. Generate TPC-H data (one-time per scale factor)
+
+```bash
+# Pull the tpchgen-cli image first
+./pull_ghcr_image.sh ghcr.io/rapidsai/velox-testing-images:tpchgen-cli
+
+./launch-gen-data.sh -s <scale_factor> -o <output_dir>
+```
+
+### 3. Analyze tables (one-time per scale factor / image version)
+
+```bash
+./launch-analyze-tables.sh -s <scale_factor> -n <nodes> \
+    -w <worker-image-name> -c <coord-image-name>
+```
+
+### 4. Run benchmarks
+
+```bash
+./launch-run.sh -n <nodes> -s <scale_factor> \
+    -w <worker-image-name> -c <coord-image-name> \
+    [-i <iterations>]
 
 # examples
-./launch-run.sh -n 8 -s 3000
-./launch-run.sh -n 4 -s 10000 -i 3 --partition gpu --account myacct
+./launch-run.sh -n 8 -s 3000 \
+    -w presto-native-worker-gpu-v1 -c presto-coordinator-v1
+
+./launch-run.sh -n 4 -s 10000 -i 3 \
+    -w presto-native-worker-gpu-v1 -c presto-coordinator-v1 \
+    --partition gpu --account myacct
 ```
 
 The launcher:
-- requires node count (-n/--nodes) and scale factor (-s/--scale-factor)
-- accepts optional iterations (-i/--iterations, default 1)
-- embeds nodes/SF/iterations in .out/.err filenames
-- prints the first node’s hostname/IP when allocated and a ready-to-run SSH port-forward command to access the Presto Web UI on your machine (http://localhost:9200)
+- requires node count (`-n/--nodes`), scale factor (`-s/--scale-factor`), worker image (`-w/--worker-image`), and coordinator image (`-c/--coord-image`)
+- accepts optional iterations (`-i/--iterations`, default 2)
+- embeds nodes/SF/iterations in `.out`/`.err` filenames
+- prints a ready-to-run SSH port-forward command to access the Presto Web UI at http://localhost:9200
 
-### Submitting directly (advanced)
+### 5. Run a sweep
 
 ```bash
-export SCALE_FACTOR=3000
-export NUM_ITERATIONS=1
-sbatch --nodes 8 \
-  --output "presto-tpch-run_n8_sf3000_i1_%j.out" \
-  --error  "presto-tpch-run_n8_sf3000_i1_%j.err" \
-  --export "ALL,SCALE_FACTOR=${SCALE_FACTOR},NUM_ITERATIONS=${NUM_ITERATIONS}" \
-  run-presto-benchmarks.slurm
+./run-sweep.sh \
+    --sku-name raplab-gb200-nvl72 \
+    --storage-configuration-name <storage-config-name> \
+    --velox-branch <branch> \
+    --presto-branch <branch> \
+    --velox-repo <url> \
+    --presto-repo <url> \
+    [-n "8 4"] \
+    [-s "3000 10000"] \
+    [-i <iterations>]
 ```
 
-## Configuration
+`--cache-state` is derived automatically: `lukewarm` for 1 iteration, `warm` for 2+. Pass `--cache-state` explicitly to override.
 
-Primary configuration is passed via the launcher flags and environment. The `.slurm` script validates that required variables are set.
+## Configuration
 
-Key variables:
+### Key variables (set via launcher flags or environment)
 
-- SCALE_FACTOR: required (provided via `-s/--scale-factor`)
-- NUM_ITERATIONS: required by the job; launcher defaults to 1 (`-i/--iterations` to override)
-- NUM_NODES: derived from Slurm allocation; provided via `-n/--nodes` to launcher
-- REPO_ROOT: auto-detected from script location
-- LOGS_DIR: `${SCRIPT_DIR}/logs` by default (log files are timestamped; old logs archived to `logs/archive/`)
-- IMAGE_DIR, DATA, CONFIGS: see below or override via environment if needed
+| Variable | Source | Description |
+|---|---|---|
+| `SCALE_FACTOR` | `-s/--scale-factor` | TPC-H scale factor (required) |
+| `NODES_COUNT` | `-n/--nodes` | Number of SLURM nodes (required) |
+| `WORKER_IMAGE` | `-w/--worker-image` | Worker image name, without `.sqsh` (required) |
+| `COORD_IMAGE` | `-c/--coord-image` | Coordinator image name, without `.sqsh` (required) |
+| `NUM_ITERATIONS` | `-i/--iterations` | Benchmark iterations (default: 2) |
+| `NUM_GPUS_PER_NODE` | `-g/--num-gpus-per-node` | GPUs per node (default: 4) |
 
-Other defaults:
-- WORKER_IMAGE: `presto-native-worker-gpu`
-- NUM_GPUS_PER_NODE: `4`
-- DATA: `/mnt/data/tpch-rs`
-- IMAGE_DIR: `/mnt/data/images/presto`
-- CONFIGS: `${REPO_ROOT}/presto/docker/config/generated/gpu`
+### Path defaults (`defaults.env`)
 
-### SBATCH Directives
+Override any of these by exporting before running:
 
-- **Time limit**: 1 hour (adjust `--time` if needed)
-- **Node allocation**: Full node (144 CPUs, 4 GPUs, exclusive)
-- **Memory**: All available (`--mem=0`)
-- `--nodes`, `--output`, and `--error` are passed by the launcher instead of being embedded in the `.slurm` file.
+| Variable | Default | Description |
+|---|---|---|
+| `DATA` | `/scratch/${USER}/tpch-rs-float-no-delta` | TPC-H parquet dataset root |
+| `IMAGE_DIR` | `/scratch/${USER}/images/presto` | Directory containing `.sqsh` image files |
+| `RESULTS_BASE` | `${HOME}/${VT_WORKSPACE}/results` | Benchmark result output root |
+| `HIVE_METASTORE_SHARED_ROOT` | `/scratch/${USER}/shared_hive_metadata` | Shared pre-analyzed metastore snapshots |
+| `HIVE_METASTORE_VERSION` | `HIVE-METASTORE-20260419-no-delta` | Metastore snapshot version tag |
 
 ## Monitoring
 
@@ -80,21 +132,18 @@ Other defaults:
 squeue -u $USER
 
 # Monitor job output
-tail -f presto-tpch-run_n<NODES>_sf<SCALE_FACTOR>_i<ITER>_<JOB_ID>.out
+tail -f presto-tpch-run_n<NODES>_sf<SF>_i<ITER>_<JOB_ID>.out
 
-# Check logs during execution (filenames include a run timestamp)
-tail -f logs/coord_*.log
-tail -f logs/cli_*.log
-tail -f logs/worker_0_*.log
+# Check logs during execution
+tail -f logs/coord.log
+tail -f logs/cli.log
+tail -f logs/worker_0.log
 ```
 
-## Coordinator IP and Web UI
+## Coordinator Web UI
 
-After submission, the launcher waits until nodes are allocated, then prints:
-- the first node’s hostname/IP
-- an SSH port-forward command you can run locally to access the Presto Web UI
-
-Example output snippet:
+After submission, the launcher waits until nodes are allocated, then prints an SSH
+port-forward command you can run locally:
 
 ```text
 Run this command on a machine to get access to the webUI:
@@ -102,45 +151,65 @@ Run this command on a machine to get access to the webUI:
 The UI will be available at http://localhost:9200
 ```
 
-## Results
+## Reusing an analyzed Hive metastore across runs
+
+Running `ANALYZE TABLE` from scratch on every clone is expensive. The launchers
+publish and consume pre-analyzed metastore snapshots keyed by version string and
+scale factor.
+
+Two env vars control sharing (defined in `defaults.env`):
 
-Results are saved to:
-- **Logs**: `logs/` directory
-- **CSV Summary**: `result_dir/summary.csv`
-- **Historical Results**: `${REPO_ROOT}/benchmark-storage/YYYY/MM/DD/`
+- `HIVE_METASTORE_SHARED_ROOT` — directory on a cluster-visible filesystem where
+  snapshots live.
+- `HIVE_METASTORE_VERSION` — version tag. Bump it when the worker image or parquet
+  data format changes so stale snapshots don't leak into runs against a newer image.
+  Unset to disable sharing entirely.
 
-## Prerequisites
+Layout: `$HIVE_METASTORE_SHARED_ROOT/$HIVE_METASTORE_VERSION/tpchsf<SF>/…`
 
-1. **Container images** must exist in `${IMAGE_DIR}`:
-   - `presto-coordinator.sqsh`
-   - `presto-native-worker-gpu.sqsh` or `presto-native-worker-cpu.sqsh`
+### Consuming — the default path
 
-2. **Data directory** must be accessible at `${DATA}` (will be mounted in containers)
+With defaults, a benchmark run just works:
 
-3. **velox-testing repo** will be auto-cloned to `${REPO_ROOT}/velox-testing` if not present
+```bash
+./launch-run.sh -n 2 -s 3000 -i 1 -w <worker-image> -c <coord-image>
+```
+
+`setup` in the SLURM job populates `.hive_metastore/tpchsf<SF>/` from the shared
+snapshot when the local copy is absent. If neither local nor shared is available the
+run fails fast with a message pointing at `launch-analyze-tables.sh`.
+
+### Publishing — run once per (version, SF) to seed a new slot
+
+```bash
+export HIVE_METASTORE_VERSION=HIVE-METASTORE-20260419-no-delta   # or a new tag
+./launch-analyze-tables.sh -s <SF> -n <nodes> -w <worker-image> -c <coord-image>
+# On success, if the target slot is empty it gets populated atomically.
+# Subsequent analyze runs with the same (version, SF) skip the publish.
+```
+
+### Disabling sharing (fall back to per-clone analyze)
+
+```bash
+unset HIVE_METASTORE_VERSION
+./launch-analyze-tables.sh -s <SF> -n <nodes> -w <worker-image> -c <coord-image>
+./launch-run.sh -n <nodes> -s <SF> -i <iters> -w <worker-image> -c <coord-image>
+```
 
 ## Troubleshooting
 
 ### Coordinator fails to start
-Check coordinator logs:
 ```bash
-cat logs/coord_*.log
+cat logs/coord.log
 ```
 
 ### Workers not registering
-Check worker logs:
 ```bash
 cat logs/worker_*.log
 ```
 
 ### Image not found
-Verify images exist:
-```bash
-ls -lh /mnt/data/images/presto/*.sqsh
-```
-
-### Data directory issues
-Verify data path is accessible:
+Pull the image first:
 ```bash
-ls -la /mnt/data/tpch-presto
+./pull_ghcr_image.sh ghcr.io/rapidsai/velox-testing-images:<tag>
 ```
diff --git a/presto/slurm/presto-nvl72/defaults.env b/presto/slurm/presto-nvl72/defaults.env
new file mode 100644
index 00000000..adfc7a93
--- /dev/null
+++ b/presto/slurm/presto-nvl72/defaults.env
@@ -0,0 +1,35 @@
+#!/bin/bash
+# ==============================================================================
+# Cluster & path defaults for presto-nvl72 benchmark scripts.
+# ==============================================================================
+# All variables use the := pattern: export a variable before sourcing this file
+# (or before running a script that sources it) to override any default.
+#
+# Multiple checkouts of velox-testing are supported — each checkout gets its own
+# scratch/output namespace via VT_WORKSPACE, derived from the repo path.
+# Override VT_WORKSPACE to use a custom prefix instead.
+# ==============================================================================
+
+# --- Workspace identifier (per-clone isolation) ---
+_vt_path="${VT_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." 2>/dev/null && pwd)}"
+: "${VT_WORKSPACE:=$(echo "${_vt_path#${HOME}/}" | tr '/' '-')}"
+unset _vt_path
+
+# --- Scratch-based paths ---
+# DATA points at the canonical TPC-H parquet tree on this cluster.
+# Override DATA in the env to point at a different layout.
+: "${DATA:=/scratch/${USER}/tpch-rs-float-no-delta}"
+: "${IMAGE_DIR:=/scratch/${USER}/images/presto}"
+: "${EXPECTED_RESULTS_BASE:=/scratch/${USER}/tpch-rs-no-delta-expected}"
+
+# --- Output paths ---
+: "${RESULTS_BASE:=${HOME}/${VT_WORKSPACE}/results}"
+
+# --- Shared Hive metastore ---
+# Pre-analyzed metastores are published here by launch-analyze-tables.sh and
+# consumed by launch-run.sh, keyed by HIVE_METASTORE_VERSION + scale factor.
+# Default version string points at the current cluster-wide published snapshot
+# (SFs 1000/3000/10000/30000 against DATA above).  Bump when the worker image
+# or parquet encoding changes, or unset to disable sharing.
+: "${HIVE_METASTORE_SHARED_ROOT:=/scratch/${USER}/shared_hive_metadata}"
+: "${HIVE_METASTORE_VERSION:=HIVE-METASTORE-20260419-no-delta}"
diff --git a/presto/slurm/presto-nvl72/enroot-decompress.sh b/presto/slurm/presto-nvl72/enroot-decompress.sh
new file mode 100755
index 00000000..8cac4212
--- /dev/null
+++ b/presto/slurm/presto-nvl72/enroot-decompress.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Auto-detecting decompressor for enroot image layer downloads.
+# Used as ENROOT_GZIP_PROGRAM to support both gzip and OCI tar+zstd layers.
+# Called by enroot as: enroot-decompress.sh -d -f -c  (args are ignored)
+
+tmp=$(mktemp)
+trap 'rm -f "$tmp"' EXIT
+
+# Peek at first 4 bytes to detect compression format without buffering the full stream
+dd bs=1 count=4 2>/dev/null > "$tmp"
+magic=$(od -A n -N 4 -t x1 "$tmp" | tr -d ' \n')
+
+case "$magic" in
+    1f8b*)
+        # gzip
+        { cat "$tmp"; cat; } | gzip -d -f -c
+        ;;
+    28b52ffd*)
+        # zstd (magic: 0xFD2FB528 stored little-endian = 28 b5 2f fd)
+        { cat "$tmp"; cat; } | zstd -d -f -c
+        ;;
+    *)
+        # Unknown format — pass through unchanged
+        cat "$tmp"
+        cat
+        ;;
+esac
diff --git a/presto/slurm/presto-nvl72/functions.sh b/presto/slurm/presto-nvl72/functions.sh
index 19605321..9a66009c 100755
--- a/presto/slurm/presto-nvl72/functions.sh
+++ b/presto/slurm/presto-nvl72/functions.sh
@@ -4,27 +4,33 @@
 
 # Validates job preconditions and assigns default values for presto execution.
 function setup {
-    [ -z "$SLURM_JOB_NAME" ] && echo "required argument '--job-name' not specified" && exit 1
-    [ -z "$SLURM_JOB_ACCOUNT" ] && echo "required argument '--account' not specified" && exit 1
-    [ -z "$SLURM_JOB_PARTITION" ] && echo "required argument '--partition' not specified" && exit 1
-    [ -z "$SLURM_NNODES" ] && echo "required argument '--nodes' not specified" && exit 1
+    [ -z "${SLURM_JOB_NAME:-}" ] && echo "required argument '--job-name' not specified" && exit 1
+    [ -z "${SLURM_JOB_ACCOUNT:-}" ] && echo "warning: '--account' not specified"
+    [ -z "${SLURM_JOB_PARTITION:-}" ] && echo "warning: '--partition' not specified"
+    [ -z "${SLURM_NNODES:-}" ] && echo "required argument '--nodes' not specified" && exit 1
     [ -z "$IMAGE_DIR" ] && echo "IMAGE_DIR must be set" && exit 1
-    [ -z "$LOGS_DIR" ] && echo "LOGS_DIR must be set" && exit 1
-    [ -z "$SERVER_START_TIMESTAMP" ] && echo "SERVER_START_TIMESTAMP must be set" && exit 1
+    [ -z "$LOGS" ] && echo "LOGS must be set" && exit 1
     [ -z "$CONFIGS" ] && echo "CONFIGS must be set" && exit 1
     [ -z "$NUM_NODES" ] && echo "NUM_NODES must be set" && exit 1
     [ -z "$NUM_GPUS_PER_NODE" ] && echo "NUM_GPUS_PER_NODE env variable must be set" && exit 1
     [ ! -d "$VT_ROOT" ] && echo "VT_ROOT must be a valid directory" && exit 1
     [ ! -d "$DATA" ] && echo "DATA must be a valid directory" && exit 1
 
-    if [ ! -d ${VT_ROOT}/.hive_metastore ]; then
-        echo "Copying hive metastore from data source."
-        copy_hive_metastore
-    else
-        echo "Hive metastore already exists.  Reusing."
+    # If sharing is opted in (HIVE_METASTORE_VERSION set) and the local
+    # snapshot is missing, try to populate it from the shared location.
+    if [[ -n "${HIVE_METASTORE_VERSION:-}" && ! -d "${VT_ROOT}/.hive_metastore/tpchsf${SCALE_FACTOR}" ]]; then
+        populate_hive_metastore_from_shared
     fi
 
-    [ ! -d ${VT_ROOT}/.hive_metastore/tpchsf${SCALE_FACTOR} ] && echo "Schema for SF ${SCALE_FACTOR} does not exist in hive metastore." && exit 1
+    if [ ! -d "${VT_ROOT}/.hive_metastore/tpchsf${SCALE_FACTOR}" ]; then
+        echo "Schema for SF ${SCALE_FACTOR} is not present in ${VT_ROOT}/.hive_metastore."
+        if [[ -n "${HIVE_METASTORE_VERSION:-}" ]]; then
+            echo "Shared slot $(shared_metastore_slot) is also empty; publish one by running launch-analyze-tables.sh with the same HIVE_METASTORE_VERSION set."
+        else
+            echo "Run launch-analyze-tables.sh -s ${SCALE_FACTOR} first, or set HIVE_METASTORE_VERSION to consume a pre-published snapshot."
+        fi
+        exit 1
+    fi
 
     generate_configs
 
@@ -59,46 +65,66 @@ function validate_environment_preconditions {
 # Execute script through the coordinator image (used for coordinator and cli executables)
 function run_coord_image {
     [ $# -ne 2 ] && echo_error "$0 expected one argument for '<script>' and one for '<coord/cli>'"
-    validate_environment_preconditions LOGS_DIR CONFIGS VT_ROOT COORD DATA COORD_IMAGE SERVER_START_TIMESTAMP
+    validate_environment_preconditions LOGS CONFIGS VT_ROOT COORD DATA COORD_IMAGE
     local script=$1
     local type=$2
     [ "$type" != "coord" ] && [ "$type" != "cli" ] && echo_error "coord type must be coord/cli"
-    local log_file="${type}_${SERVER_START_TIMESTAMP}.log"
+    local log_file="${type}.log"
 
     local coord_image="${IMAGE_DIR}/${COORD_IMAGE}.sqsh"
     [ ! -f "${coord_image}" ] && echo_error "coord image does not exist at ${coord_image}"
 
+    # Provide a writable base data directory for the coordinator so that the
+    # Presto launcher can create /var/lib/presto/data/var (PID file, etc.).
+    # Workers do the same via worker_data_N; without this mount the squash
+    # image filesystem is read-only and the launcher fails with EROFS.
+    local coord_data="${SCRIPT_DIR}/coord_data"
+    mkdir -p "${coord_data}"
+
+    # Miniforge is installed at ${VT_ROOT}/miniforge3. Its conda/python scripts
+    # have shebangs hardcoded to the host-absolute install path. We bind-mount
+    # miniforge at that same absolute path inside the container so shebangs
+    # resolve correctly regardless of where /workspace points.
+    local miniforge_dir="${VT_ROOT}/miniforge3"
+    local extra_mounts=""
+    if [ -d "${miniforge_dir}" ]; then
+        extra_mounts=",${miniforge_dir}:${miniforge_dir}"
+    fi
+    if [ -d "/scratch" ]; then
+        extra_mounts="${extra_mounts},/scratch:/scratch"
+    fi
+
     # Coordinator runs as a background process, whereas we want to wait for cli
     # so that the job will finish when the cli is done (terminating background
     # processes like the coordinator and workers).
     if [ "${type}" == "coord" ]; then
         srun -w $COORD --ntasks=1 --overlap \
 --container-image=${coord_image} \
---export=ALL,JAVA_HOME=/usr/lib/jvm/jre-17-openjdk \
---container-env=JAVA_HOME=/usr/lib/jvm/jre-17-openjdk \
---container-env=PATH=/usr/lib/jvm/jre-17-openjdk/bin:$PATH \
+--container-remap-root \
+--export=ALL \
 --container-mounts=${VT_ROOT}:/workspace,\
+${coord_data}:/var/lib/presto/data,\
 ${CONFIGS}/etc_common:/opt/presto-server/etc,\
 ${CONFIGS}/etc_coordinator/node.properties:/opt/presto-server/etc/node.properties,\
 ${CONFIGS}/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties,\
 ${CONFIGS}/etc_coordinator/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties,\
 ${DATA}:/var/lib/presto/data/hive/data/user_data,\
-${VT_ROOT}/.hive_metastore:/var/lib/presto/data/hive/metastore \
--- bash -lc "unset JAVA_HOME; export JAVA_HOME=/usr/lib/jvm/jre-17-openjdk; export PATH=/usr/lib/jvm/jre-17-openjdk/bin:\$PATH; ${script}" >> ${LOGS_DIR}/${log_file} 2>&1 &
+${VT_ROOT}/.hive_metastore:/var/lib/presto/data/hive/metastore${extra_mounts} \
+-- bash -lc "unset JAVA_HOME; export JAVA_HOME=/usr/lib/jvm/jre-17-openjdk; export PATH=/usr/lib/jvm/jre-17-openjdk/bin:\$PATH; ${script}" >> ${LOGS}/${log_file} 2>&1 &
     else
         srun -w $COORD --ntasks=1 --overlap \
+--container-remap-root \
 --container-image=${coord_image} \
---export=ALL,JAVA_HOME=/usr/lib/jvm/jre-17-openjdk \
---container-env=JAVA_HOME=/usr/lib/jvm/jre-17-openjdk \
---container-env=PATH=/usr/lib/jvm/jre-17-openjdk/bin:$PATH \
+--export=ALL \
 --container-mounts=${VT_ROOT}:/workspace,\
+${coord_data}:/var/lib/presto/data,\
 ${CONFIGS}/etc_common:/opt/presto-server/etc,\
 ${CONFIGS}/etc_coordinator/node.properties:/opt/presto-server/etc/node.properties,\
 ${CONFIGS}/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties,\
 ${CONFIGS}/etc_coordinator/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties,\
 ${DATA}:/var/lib/presto/data/hive/data/user_data,\
-${VT_ROOT}/.hive_metastore:/var/lib/presto/data/hive/metastore \
--- bash -lc "unset JAVA_HOME; export JAVA_HOME=/usr/lib/jvm/jre-17-openjdk; export PATH=/usr/lib/jvm/jre-17-openjdk/bin:\$PATH; ${script}" >> ${LOGS_DIR}/${log_file} 2>&1
+${VT_ROOT}/.hive_metastore:/var/lib/presto/data/hive/metastore${extra_mounts} \
+-- bash -lc "unset JAVA_HOME; export JAVA_HOME=/usr/lib/jvm/jre-17-openjdk; export PATH=/usr/lib/jvm/jre-17-openjdk/bin:\$PATH; ${script}" >> ${LOGS}/${log_file} 2>&1
     fi
 }
 
@@ -152,10 +178,15 @@ run_coord_image "$COORD_SCRIPT" "coord"
 # Runs a worker on a given node with custom configuration files which are generated as necessary.
 function run_worker {
     [ $# -ne 4 ] && echo_error "$0 expected arguments 'gpu_id', 'image', 'node_id', and 'worker_id'"
-    validate_environment_preconditions LOGS_DIR CONFIGS VT_ROOT COORD CUDF_LIB DATA
+    validate_environment_preconditions LOGS CONFIGS VT_ROOT COORD CUDF_LIB DATA
 
     local gpu_id=$1 image=$2 node=$3 worker_id=$4
-    echo "running worker ${worker_id} with image ${image} on node ${node} with gpu_id ${gpu_id}"
+    # GB200 NVL72 compute tray: 2 Grace CPUs x 2 Blackwell GPUs per CPU.
+    # CPU NUMA 0 = cores 0-71 (Grace 0, GPUs 0-1).
+    # CPU NUMA 1 = cores 72-143 (Grace 1, GPUs 2-3).
+    # Pairs each worker with the CPU socket its GPU is attached to over NVLink-C2C.
+    local numa_node=$((gpu_id / 2))
+    echo "running worker ${worker_id} with image ${image} on node ${node} with gpu_id ${gpu_id} numa_node ${numa_node}"
 
     local worker_image="${IMAGE_DIR}/${image}.sqsh"
     [ ! -f "${worker_image}" ] && echo_error "worker image does not exist at ${worker_image}"
@@ -174,15 +205,30 @@ function run_worker {
     mkdir -p ${worker_data}/hive/data/user_data
     mkdir -p ${VT_ROOT}/.hive_metastore
 
-    # Need to fix this to run with cpu nodes as well.
-    # Run the worker with the new configs.
-    # Use --overlap to allow multiple srun commands from same job
-    # Don't use --gres=gpu:1 here since the job already allocated GPUs
-    # Set CUDA_VISIBLE_DEVICES explicitly in bash command to override SLURM default
+    # To re-enable verbose GLOG logging, add these flags to the srun call below
+    # (note: move them inside -- bash -c "..." as exports, not --container-env,
+    # since pyxis ignores key=value in --container-env):
+    #   GLOG_vmodule=IntraNodeTransferRegistry=3,ExchangeOperator=3
+    #   GLOG_logtostderr=1
+    # Warning: GLOG_logtostderr=1 generates very large logs that can fill the disk.
+
+    # The parent SLURM job allocates --gres=gpu:NUM_GPUS_PER_NODE so all GPU kernel
+    # capabilities are already set up for the job cgroup.  Do NOT use --gres=gpu:1
+    # on the step: it restricts the step's cgroup to one GPU and then nvidia-container-cli
+    # rejects NVIDIA_VISIBLE_DEVICES values for other GPUs as "unknown device".
+    #
+    # NVIDIA_VISIBLE_DEVICES=all triggers the enroot 98-nvidia.sh hook which calls
+    # nvidia-container-cli configure --device=all --compute.  This mounts all GPU
+    # devices and all required host driver libraries (580.105.08: libcuda, libnvidia-
+    # gpucomp, libnvidia-nvvm, libnvidia-ptxjitcompiler, libnvidia-ml, etc.) and runs
+    # ldconfig inside the container.  The manual libcuda bind-mount then overrides the
+    # compat library with the host driver so cudaMallocAsync works.
+    # CUDA_VISIBLE_DEVICES=${gpu_id} inside the container restricts each worker to
+    # its assigned GPU while still allowing the CUDA driver to enumerate all devices.
     srun -N1 -w $node --ntasks=1 --overlap \
 --container-image=${worker_image} \
---export=ALL \
---container-env=LD_LIBRARY_PATH="/usr/lib64/presto-native-libs:/usr/local/lib:/usr/lib64" \
+--container-remap-root \
+--export=ALL,NVIDIA_VISIBLE_DEVICES=all,NVIDIA_DRIVER_CAPABILITIES=compute,utility \
 --container-mounts=${VT_ROOT}:/workspace,\
 ${CONFIGS}/etc_common:/opt/presto-server/etc,\
 ${worker_node}:/opt/presto-server/etc/node.properties,\
@@ -190,29 +236,134 @@ ${worker_config}:/opt/presto-server/etc/config.properties,\
 ${worker_hive}:/opt/presto-server/etc/catalog/hive.properties,\
 ${worker_data}:/var/lib/presto/data,\
 ${DATA}:/var/lib/presto/data/hive/data/user_data,\
-${VT_ROOT}/.hive_metastore:/var/lib/presto/data/hive/metastore \
---container-env=LD_LIBRARY_PATH="$CUDF_LIB:$LD_LIBRARY_PATH" \
---container-env=GLOG_vmodule=IntraNodeTransferRegistry=3,ExchangeOperator=3 \
---container-env=GLOG_logtostderr=1 \
--- /bin/bash -c "export CUDA_VISIBLE_DEVICES=${gpu_id}; echo \"CUDA_VISIBLE_DEVICES=\$CUDA_VISIBLE_DEVICES\"; echo \"--- Environment Variables ---\"; set | grep -E 'UCX_|CUDA_VISIBLE_DEVICES'; echo \"GPU Name: \$(nvidia-smi --query-gpu=name --format=csv,noheader | head -n 1)\"; /usr/bin/presto_server --etc-dir=/opt/presto-server/etc" > ${LOGS_DIR}/worker_${worker_id}_${SERVER_START_TIMESTAMP}.log 2>&1 &
+${VT_ROOT}/.hive_metastore:/var/lib/presto/data/hive/metastore,\
+/usr/lib/aarch64-linux-gnu/libcuda.so.580.105.08:/usr/local/cuda-13.0/compat/libcuda.so.1,\
+/usr/lib/aarch64-linux-gnu/libnvidia-ml.so.580.105.08:/usr/local/lib/libnvidia-ml.so.1 \
+-- /bin/bash -c "
+export LD_LIBRARY_PATH='${CUDF_LIB}':/usr/local/lib:\${LD_LIBRARY_PATH:-}
+if [[ '${VARIANT_TYPE}' == 'gpu' ]]; then export CUDA_VISIBLE_DEVICES=${gpu_id}; fi
+echo \"Worker ${worker_id}: CUDA_VISIBLE_DEVICES=\${CUDA_VISIBLE_DEVICES:-none}, NUMA_NODE=${numa_node}\"
+if [[ '${USE_NUMA}' == '1' ]]; then
+    numactl --cpubind=${numa_node} --membind=${numa_node} /usr/bin/presto_server --etc-dir=/opt/presto-server/etc
+else
+    /usr/bin/presto_server --etc-dir=/opt/presto-server/etc
+fi" > ${LOGS}/worker_${worker_id}.log 2>&1 &
 }
 
-function copy_hive_metastore {
-    cp -r /mnt/data/tpch-rs/HIVE-METASTORE-MG-260313 ${VT_ROOT}/.hive_metastore
+# ----------------------------------------------------------------------------
+# Shared Hive metastore: publish/populate
+# ----------------------------------------------------------------------------
+# When HIVE_METASTORE_VERSION is set, analyze runs publish their post-ANALYZE
+# tpchsf<SF> tree under $HIVE_METASTORE_SHARED_ROOT/<version>/tpchsf<SF>/, and
+# subsequent benchmark runs populate from that snapshot instead of re-analyzing.
+# Paths inside a .prestoSchema file are container-relative
+# (file:/var/lib/presto/data/hive/data/user_data/scale-<SF>/<table>), so a
+# single snapshot works for any user whose DATA bind-mount lands on the same
+# in-container path.
+
+# Echo the absolute path of the shared slot for the current (version, SF).
+# Empty output => sharing is not opted in.
+function shared_metastore_slot {
+    [[ -z "${HIVE_METASTORE_VERSION:-}" ]] && return 0
+    [[ -z "${HIVE_METASTORE_SHARED_ROOT:-}" || -z "${SCALE_FACTOR:-}" ]] && return 0
+    echo "${HIVE_METASTORE_SHARED_ROOT}/${HIVE_METASTORE_VERSION}/tpchsf${SCALE_FACTOR}"
+}
+
+# Copy the shared snapshot for the current (version, SF) into the local
+# .hive_metastore.  Skipped when the shared slot does not exist; the caller
+# still has to have the tpchsf<SF> tree in .hive_metastore one way or another
+# (a local analyze run also produces one).
+function populate_hive_metastore_from_shared {
+    local slot
+    slot=$(shared_metastore_slot)
+    if [[ -z "${slot}" || ! -d "${slot}" ]]; then
+        echo "No shared metastore snapshot at ${slot:-<sharing disabled>}; skipping populate."
+        return 0
+    fi
+    local dest="${VT_ROOT}/.hive_metastore/tpchsf${SCALE_FACTOR}"
+    echo "Populating ${dest} from shared snapshot ${slot}"
+    mkdir -p "${VT_ROOT}/.hive_metastore"
+    rsync -a --delete "${slot}/" "${dest}/"
+}
+
+# If the shared slot for the current (version, SF) is empty, atomically publish
+# the just-analyzed .hive_metastore/tpchsf<SF> tree into it.  Uses a staging
+# directory under the same parent so the final rename is atomic on the shared
+# filesystem; concurrent publishers from different jobs race harmlessly.
+function publish_hive_metastore_to_shared {
+    local slot
+    slot=$(shared_metastore_slot)
+    if [[ -z "${slot}" ]]; then
+        echo "HIVE_METASTORE_VERSION not set; skipping publish."
+        return 0
+    fi
+    local src="${VT_ROOT}/.hive_metastore/tpchsf${SCALE_FACTOR}"
+    if [[ ! -d "${src}" ]]; then
+        echo "Nothing to publish: ${src} does not exist."
+        return 0
+    fi
+    if [[ -d "${slot}" ]]; then
+        echo "Shared slot already populated at ${slot}; skipping publish."
+        return 0
+    fi
+    local parent staging
+    parent="$(dirname "${slot}")"
+    staging="${parent}/.staging-${SLURM_JOB_ID:-$$}-tpchsf${SCALE_FACTOR}"
+    mkdir -p "${parent}"
+    rm -rf "${staging}"
+    echo "Publishing ${src} -> ${slot} (via ${staging})"
+    rsync -a "${src}/" "${staging}/"
+    # Racy between the is-empty check and the rename; mv -T rejects overwriting
+    # a non-empty dir, which is the protection we want.  If another publisher
+    # wins, drop our staging copy.
+    if ! mv -T "${staging}" "${slot}" 2>/dev/null; then
+        echo "Another publisher populated ${slot} first; discarding staging copy."
+        rm -rf "${staging}"
+    fi
 }
 
 # Run a cli node that will connect to the coordinator and run queries from queries.sql
-# Results are stored in cli_<SERVER_START_TIMESTAMP>.log.
+# Results are stored in cli.log.
 function run_queries {
     echo "running queries"
     [ $# -ne 2 ] && echo_error "$0 expected two arguments for '<iterations>' and '<scale_factor>'"
     local num_iterations=$1
     local scale_factor=$2
-    # We currently skip dropping cache because it requires docker (not available on the cluster).
-    run_coord_image "export PORT=$PORT; \
+    source "${SCRIPT_DIR}/defaults.env"
+
+    # The upstream coordinator image ships without jq, which
+    # run_benchmark.sh's wait_for_worker_node_registration requires.
+    # yum/dnf cannot install it at runtime because the container root is
+    # a read-only squashfs (/var/cache/dnf is read-only).  Stage a
+    # statically-linked jq under VT_ROOT (which is bind-mounted into the
+    # container as /workspace) and prepend that to PATH.  The download
+    # is cached across runs so the cost is paid once.
+    local jq_cache="${VT_ROOT}/.cache/bin"
+    local jq_arch
+    case "$(uname -m)" in
+        aarch64|arm64) jq_arch="arm64" ;;
+        x86_64|amd64)  jq_arch="amd64" ;;
+        *) echo_error "unsupported arch for jq download: $(uname -m)" ;;
+    esac
+    if [ ! -x "${jq_cache}/jq" ]; then
+        echo "Staging static jq (${jq_arch}) at ${jq_cache}/jq"
+        mkdir -p "${jq_cache}"
+        curl -sSL "https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-${jq_arch}" \
+            -o "${jq_cache}/jq"
+        chmod +x "${jq_cache}/jq"
+    fi
+
+    # Result validation is intentionally not wired here yet: that belongs
+    # to PR #275 (upstream validate_results.py) and will be hooked up
+    # after the PR merges and this branch is rebased.
+    # Cache-drop is skipped because it requires docker (not available on
+    # the cluster).
+    run_coord_image "export PATH=/workspace/.cache/bin:\$PATH; \
+    export PORT=$PORT; \
     export HOSTNAME=$COORD; \
     export PRESTO_DATA_DIR=/var/lib/presto/data/hive/data/user_data; \
-    yum install python3.12 jq -y > /dev/null; \
+    export MINIFORGE_HOME=/workspace/miniforge3; \
+    export HOME=/workspace; \
     cd /workspace/presto/scripts; \
     ./run_benchmark.sh -b tpch -s tpchsf${scale_factor} -i ${num_iterations} \
         --hostname ${COORD} --port $PORT -o /workspace/presto/slurm/presto-nvl72/result_dir --skip-drop-cache" "cli"
@@ -221,7 +372,7 @@ function run_queries {
 # Check if the coordinator is running via curl.  Fail after 10 retries.
 function wait_until_coordinator_is_running {
     echo "waiting for coordinator to be accessible"
-    validate_environment_preconditions COORD LOGS_DIR
+    validate_environment_preconditions COORD LOGS
     local state="INACTIVE"
     for i in {1..10}; do
         state=$(curl -s http://${COORD}:${PORT}/v1/info/state || true)
@@ -236,7 +387,7 @@ function wait_until_coordinator_is_running {
 
 # Check N nodes are registered with the coordinator.  Fail after 60 retries (5 minutes).
 function wait_for_workers_to_register {
-    validate_environment_preconditions LOGS_DIR COORD
+    validate_environment_preconditions LOGS COORD
     [ $# -ne 1 ] && echo_error "$0 expected one argument for 'expected number of workers'"
     echo "waiting for $1 workers to register"
     local expected_num_workers=$1
@@ -268,3 +419,86 @@ function validate_config_directory {
     validate_file_exists "${CONFIGS}/etc_worker/node.properties"
     echo "configs are valid"
 }
+
+function collect_results {
+    local result_dir="${SCRIPT_DIR}/result_dir"
+
+    echo "Copying configs to ${result_dir}/configs/..."
+    mkdir -p "${result_dir}/configs"
+    cp "${CONFIGS}/etc_coordinator/config_native.properties" "${result_dir}/configs/coordinator.config"
+    cp "${CONFIGS}/etc_worker_0/config_native.properties"    "${result_dir}/configs/worker.config"
+
+    echo "Copying logs to ${result_dir}/..."
+    cp "${LOGS}"/*.log "${result_dir}/"
+}
+
+function inject_benchmark_metadata {
+    local result_file="${SCRIPT_DIR}/result_dir/benchmark_result.json"
+    if [ ! -f "${result_file}" ]; then
+        echo "Warning: ${result_file} not found, skipping metadata injection"
+        return
+    fi
+
+    local kind="multi-node"
+    if (( NUM_WORKERS == 1 )); then
+        kind="single-node"
+    fi
+
+    local timestamp
+    timestamp=$(date +"%Y-%m-%dT%H:%M:%SZ")
+
+    local num_drivers
+    num_drivers=$(grep "^task\.max-drivers-per-task=" "${CONFIGS}/etc_worker/config_native.properties" 2>/dev/null \
+                  | cut -d= -f2) || true
+    num_drivers="${num_drivers:-2}"
+
+    local cudf_enabled
+    cudf_enabled=$(grep "^cudf\.enabled=" "${CONFIGS}/etc_worker/config_native.properties" 2>/dev/null \
+                   | cut -d= -f2) || true
+    local engine gpu_count gpu_name
+    if [[ "${cudf_enabled}" == "true" ]]; then
+        engine="presto-velox-gpu"
+        gpu_count="${NUM_WORKERS}"
+        gpu_name=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader -i 0 2>/dev/null | head -1) || true
+        gpu_name="${gpu_name:-unknown}"
+    else
+        engine="presto-velox-cpu"
+        gpu_count=0
+        gpu_name="N/A"
+    fi
+
+    local worker_image_path="${IMAGE_DIR}/${WORKER_IMAGE}.sqsh"
+    local image_digest
+    echo "Computing SHA256 of ${worker_image_path}..."
+    image_digest=$(sha256sum "${worker_image_path}" | awk '{print $1}') || true
+    image_digest="${image_digest:-unknown}"
+    echo "Image digest: ${image_digest}"
+
+    local tmp_file
+    tmp_file=$(mktemp)
+    jq --arg kind "$kind" \
+       --arg timestamp "$timestamp" \
+       --argjson n_workers "$NUM_WORKERS" \
+       --argjson node_count "$NUM_NODES" \
+       --argjson scale_factor "$SCALE_FACTOR" \
+       --argjson gpu_count "$gpu_count" \
+       --arg gpu_name "$gpu_name" \
+       --argjson num_drivers "$num_drivers" \
+       --arg worker_image "$WORKER_IMAGE" \
+       --arg image_digest "$image_digest" \
+       --arg engine "$engine" \
+       '.context += {
+           kind: $kind,
+           timestamp: $timestamp,
+           n_workers: $n_workers,
+           node_count: $node_count,
+           scale_factor: $scale_factor,
+           gpu_count: $gpu_count,
+           gpu_name: $gpu_name,
+           num_drivers: $num_drivers,
+           worker_image: $worker_image,
+           image_digest: $image_digest,
+           engine: $engine
+       }' "${result_file}" > "${tmp_file}" && mv "${tmp_file}" "${result_file}"
+    echo "Injected benchmark metadata into ${result_file}"
+}
diff --git a/presto/slurm/presto-nvl72/gen-tpch-data.slurm b/presto/slurm/presto-nvl72/gen-tpch-data.slurm
new file mode 100644
index 00000000..6d1ed9c8
--- /dev/null
+++ b/presto/slurm/presto-nvl72/gen-tpch-data.slurm
@@ -0,0 +1,78 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#SBATCH --job-name=gen-tpch-data
+#SBATCH --time=04:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --cpus-per-task=144
+#SBATCH --mem=0
+#SBATCH --exclusive
+
+# ==============================================================================
+# TPC-H Data Generation Job Script
+# ==============================================================================
+# Generates TPC-H parquet data using benchmark_data_tools/generate_data_files.py
+# from velox-testing, running inside the tpchgen-cli container image.
+#
+# Required env vars (passed via --export or set below):
+#   SCALE_FACTOR  - TPC-H scale factor (e.g. 100)
+#   OUTPUT_DIR    - Absolute path to the output directory on the host
+#   PARALLELISM   - Number of parallel generation threads (default: 100)
+#
+# Pre-requisite: pull the image with:
+#   ./pull_ghcr_image.sh ghcr.io/rapidsai/velox-testing-images:tpchgen-cli
+
+# ==============================================================================
+# Defaults
+# ==============================================================================
+: "${SCALE_FACTOR:=100}"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
+source "${SCRIPT_DIR}/defaults.env"
+: "${OUTPUT_DIR:=/scratch/${USER}/${VT_WORKSPACE}/tpch-rs-float/scale-100-no-delta}"
+: "${PARALLELISM:=100}"
+: "${IMAGE:=${IMAGE_DIR}/velox-testing-images-tpchgen-cli.sqsh}"
+
+# ==============================================================================
+# Pre-flight Info
+# ==============================================================================
+echo "========================================"
+echo "Job ID:       $SLURM_JOB_ID"
+echo "Node:         $SLURM_JOB_NODELIST"
+echo "Scale factor: $SCALE_FACTOR"
+echo "Output dir:   $OUTPUT_DIR"
+echo "Parallelism:  $PARALLELISM"
+echo "Image:        $IMAGE"
+echo "========================================"
+
+if [[ ! -f "${IMAGE}" ]]; then
+    echo "Error: tpchgen-cli image not found at: ${IMAGE}"
+    echo "Pull it first with:"
+    echo "  ${SCRIPT_DIR}/pull_ghcr_image.sh ghcr.io/rapidsai/velox-testing-images:tpchgen-cli"
+    exit 1
+fi
+
+mkdir -p "${OUTPUT_DIR}"
+
+# ==============================================================================
+# Generate Data
+# ==============================================================================
+# Mount OUTPUT_DIR as /data and VT_ROOT as /workspace inside the container.
+# generate_data_files.py calls tpchgen-cli (present in the image) to produce
+# per-table partitioned parquet files, then rearranges them and writes metadata.json.
+srun --ntasks=1 \
+  --container-image="${IMAGE}" \
+  --container-mounts="${OUTPUT_DIR}:/data,${VT_ROOT}:/workspace" \
+  -- python3 /workspace/benchmark_data_tools/generate_data_files.py \
+    -b tpch \
+    -d /data \
+    -s "${SCALE_FACTOR}" \
+    --convert-decimals-to-floats \
+    -j "${PARALLELISM}" \
+    --verbose
+
+echo "========================================"
+echo "Data generation complete!"
+echo "Output: ${OUTPUT_DIR}"
+echo "========================================"
diff --git a/presto/slurm/presto-nvl72/launch-analyze-tables.sh b/presto/slurm/presto-nvl72/launch-analyze-tables.sh
new file mode 100755
index 00000000..da5ac347
--- /dev/null
+++ b/presto/slurm/presto-nvl72/launch-analyze-tables.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# ==============================================================================
+# Presto Analyze Tables Launcher
+# ==============================================================================
+# Submits run-analyze-tables.slurm to Slurm.
+#
+# Usage:
+#   ./launch-analyze-tables.sh -s|--scale-factor <sf> [-n|--nodes <count>]
+#                              [-d|--data-dir <path>] [additional sbatch options]
+#
+# Examples:
+#   # SF100, single node, default data path
+#   ./launch-analyze-tables.sh -s 100
+#
+#   # SF3000, 2 nodes, custom data directory
+#   ./launch-analyze-tables.sh -s 3000 -n 2 -d /scratch/$USER/my-workspace/tpch-rs-float-no-delta
+#
+#   # Override wall-clock limit for very large scale factors
+#   ./launch-analyze-tables.sh -s 3000 -n 4 -d /scratch/$USER/my-workspace/tpch-rs-float-no-delta --time=12:00:00
+# ==============================================================================
+
+set -e
+
+cd "$(dirname "$0")"
+
+# Defaults
+NODES_COUNT="1"
+SCALE_FACTOR=""
+NUM_GPUS_PER_NODE="4"
+WORKER_IMAGE="presto-native-worker-gpu"
+COORD_IMAGE="presto-coordinator"
+DATA_DIR=""   # empty => slurm script uses its built-in default
+EXTRA_ARGS=()
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        -s|--scale-factor)
+            [[ -n "${2:-}" && "${2:0:1}" != "-" ]] || { echo "Error: $1 requires a value"; exit 1; }
+            SCALE_FACTOR="$2"; shift 2 ;;
+        -n|--nodes)
+            [[ -n "${2:-}" && "${2:0:1}" != "-" ]] || { echo "Error: $1 requires a value"; exit 1; }
+            NODES_COUNT="$2"; shift 2 ;;
+        -d|--data-dir)
+            [[ -n "${2:-}" && "${2:0:1}" != "-" ]] || { echo "Error: $1 requires a value"; exit 1; }
+            DATA_DIR="$2"; shift 2 ;;
+        -g|--num-gpus-per-node)
+            [[ -n "${2:-}" && "${2:0:1}" != "-" ]] || { echo "Error: $1 requires a value"; exit 1; }
+            NUM_GPUS_PER_NODE="$2"; shift 2 ;;
+        -w|--worker-image)
+            [[ -n "${2:-}" && "${2:0:1}" != "-" ]] || { echo "Error: $1 requires a value"; exit 1; }
+            WORKER_IMAGE="$2"; shift 2 ;;
+        -c|--coord-image)
+            [[ -n "${2:-}" && "${2:0:1}" != "-" ]] || { echo "Error: $1 requires a value"; exit 1; }
+            COORD_IMAGE="$2"; shift 2 ;;
+        --) shift; break ;;
+        *) EXTRA_ARGS+=("$1"); shift ;;
+    esac
+done
+
+if [[ -z "${SCALE_FACTOR}" ]]; then
+    echo "Error: -s|--scale-factor is required"
+    echo "Usage: $0 -s <sf> [-n <nodes>] [-d <data_dir>] [sbatch options...]"
+    exit 1
+fi
+
+# Clean up stale logs/output files from previous runs
+rm -f logs/* *.out *.err 2>/dev/null || true
+mkdir -p logs
+
+SCRIPT_DIR="$PWD"
+
+EXPORT_VARS="ALL,SCALE_FACTOR=${SCALE_FACTOR},SCRIPT_DIR=${SCRIPT_DIR},NUM_GPUS_PER_NODE=${NUM_GPUS_PER_NODE},WORKER_IMAGE=${WORKER_IMAGE},COORD_IMAGE=${COORD_IMAGE}"
+if [[ -n "${DATA_DIR}" ]]; then
+    EXPORT_VARS="${EXPORT_VARS},DATA=${DATA_DIR}"
+fi
+# Forward shared-metastore config from the calling shell so the slurm job
+# can auto-publish.  Safe when unset: sbatch ignores empty KEY= entries.
+if [[ -n "${HIVE_METASTORE_VERSION:-}" ]]; then
+    EXPORT_VARS="${EXPORT_VARS},HIVE_METASTORE_VERSION=${HIVE_METASTORE_VERSION}"
+fi
+if [[ -n "${HIVE_METASTORE_SHARED_ROOT:-}" ]]; then
+    EXPORT_VARS="${EXPORT_VARS},HIVE_METASTORE_SHARED_ROOT=${HIVE_METASTORE_SHARED_ROOT}"
+fi
+
+OUT_FMT="presto-analyze_n${NODES_COUNT}_sf${SCALE_FACTOR}_%j.out"
+ERR_FMT="presto-analyze_n${NODES_COUNT}_sf${SCALE_FACTOR}_%j.err"
+
+echo "Submitting Presto Analyze Tables job..."
+echo "  Scale factor : SF${SCALE_FACTOR}"
+echo "  Nodes        : ${NODES_COUNT}"
+echo "  GPUs/node    : ${NUM_GPUS_PER_NODE}  (total workers: $((NODES_COUNT * NUM_GPUS_PER_NODE)))"
+echo "  Worker image : ${WORKER_IMAGE}"
+echo "  Data dir     : ${DATA_DIR:-<default>}"
+echo ""
+
+JOB_ID=$(sbatch \
+    --nodes="${NODES_COUNT}" \
+    --gres="gpu:${NUM_GPUS_PER_NODE}" \
+    --export="${EXPORT_VARS}" \
+    --output="${OUT_FMT}" \
+    --error="${ERR_FMT}" \
+    "${EXTRA_ARGS[@]}" \
+    run-analyze-tables.slurm | awk '{print $NF}')
+
+OUT_FILE="${OUT_FMT//%j/${JOB_ID}}"
+ERR_FILE="${ERR_FMT//%j/${JOB_ID}}"
+
+echo "Job submitted with ID: $JOB_ID"
+echo ""
+echo "Monitor with:"
+echo "  squeue -j $JOB_ID"
+echo "  tail -f ${OUT_FILE}"
+echo "  tail -f ${ERR_FILE}"
+echo "  tail -f logs/coord.log"
+echo "  tail -f logs/worker_*.log"
+echo "  tail -f logs/cli.log"
+echo ""
+echo "Waiting for job to complete..."
+
+while squeue -j "$JOB_ID" 2>/dev/null | grep -q "$JOB_ID"; do
+    sleep 10
+done
+
+echo ""
+echo "Job completed!"
+echo ""
+echo "Hive metastore updated at: $(cd ../../.. && pwd -P)/.hive_metastore"
+echo ""
+echo "Showing job output:"
+echo "========================================"
+cat "${OUT_FILE}" 2>/dev/null || echo "No output available"
+echo ""
+echo "Showing CLI log:"
+cat logs/cli.log 2>/dev/null || echo "No CLI output available"
diff --git a/presto/slurm/presto-nvl72/launch-gen-data.sh b/presto/slurm/presto-nvl72/launch-gen-data.sh
new file mode 100755
index 00000000..57898c6d
--- /dev/null
+++ b/presto/slurm/presto-nvl72/launch-gen-data.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# ==============================================================================
+# TPC-H Data Generation Launcher
+# ==============================================================================
+# Submits a SLURM job to generate TPC-H parquet data using tpchgen-rs.
+#
+# Usage:
+#   ./launch-gen-data.sh [--scale-factor <sf>] [--output-dir <path>] [--parallelism <n>] [additional sbatch options]
+#
+# To change the container image, override IMAGE_DIR or set IMAGE before running.
+# Pre-pull the image with: ./pull_ghcr_image.sh ghcr.io/rapidsai/velox-testing-images:tpchgen-cli
+# ==============================================================================
+
+set -e
+
+cd "$(dirname "$0")"
+
+module load slurm 2>/dev/null || true
+
+source "$(dirname "$0")/defaults.env"
+
+SCALE_FACTOR="100"
+OUTPUT_DIR="${OUTPUT_DIR:-/scratch/${USER}/${VT_WORKSPACE}/tpch-rs-float/scale-100-no-delta}"
+PARALLELISM="100"
+# NODELIST defaults to empty -- Slurm picks any available node.
+# Override via env or -N/--nodelist to pin.
+NODELIST="${NODELIST:-}"
+EXTRA_ARGS=()
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        -s|--scale-factor)
+            SCALE_FACTOR="$2"; shift 2 ;;
+        -o|--output-dir)
+            OUTPUT_DIR="$2"; shift 2 ;;
+        -j|--parallelism)
+            PARALLELISM="$2"; shift 2 ;;
+        -N|--nodelist)
+            NODELIST="$2"; shift 2 ;;
+        --) shift; break ;;
+        *) EXTRA_ARGS+=("$1"); shift ;;
+    esac
+done
+
+echo "Submitting TPC-H data generation job..."
+echo "  Scale factor: $SCALE_FACTOR"
+echo "  Output dir:   $OUTPUT_DIR"
+echo "  Parallelism:  $PARALLELISM"
+echo "  Node:         ${NODELIST:-<any available>}"
+echo ""
+
+OUT_FMT="gen-tpch-data_sf${SCALE_FACTOR}_%j.out"
+ERR_FMT="gen-tpch-data_sf${SCALE_FACTOR}_%j.err"
+
+NODELIST_ARG=()
+if [[ -n "${NODELIST}" ]]; then
+    NODELIST_ARG=(--nodelist="${NODELIST}")
+fi
+
+JOB_ID=$(sbatch \
+  --nodes=1 \
+  "${NODELIST_ARG[@]}" \
+  --export="ALL,SCALE_FACTOR=${SCALE_FACTOR},OUTPUT_DIR=${OUTPUT_DIR},PARALLELISM=${PARALLELISM}" \
+  --output="${OUT_FMT}" \
+  --error="${ERR_FMT}" \
+  "${EXTRA_ARGS[@]}" \
+  gen-tpch-data.slurm | awk '{print $NF}')
+
+OUT_FILE="${OUT_FMT//%j/${JOB_ID}}"
+ERR_FILE="${ERR_FMT//%j/${JOB_ID}}"
+
+echo "Job submitted with ID: $JOB_ID"
+echo ""
+echo "Monitor job with:"
+echo "  squeue -j $JOB_ID"
+echo "  tail -f ${OUT_FILE}"
+echo "  tail -f ${ERR_FILE}"
+echo ""
+echo "Waiting for job to complete..."
+
+while squeue -j $JOB_ID 2>/dev/null | grep -q $JOB_ID; do
+    sleep 5
+done
+
+echo ""
+echo "Job completed!"
+echo ""
+echo "Output files:"
+ls -lh "${OUT_FILE}" "${ERR_FILE}" 2>/dev/null || echo "No output files found"
+echo ""
+echo "Showing job output:"
+echo "========================================"
+cat "${OUT_FILE}" 2>/dev/null || echo "No output available"
diff --git a/presto/slurm/presto-nvl72/launch-run.sh b/presto/slurm/presto-nvl72/launch-run.sh
index 83e9e060..52c0df34 100755
--- a/presto/slurm/presto-nvl72/launch-run.sh
+++ b/presto/slurm/presto-nvl72/launch-run.sh
@@ -19,8 +19,12 @@ set -e
 # Change to script directory
 cd "$(dirname "$0")"
 
-# Clean up old output files
-rm -f result_dir/* logs/* *.out *.err 2>/dev/null || true
+source ./defaults.env
+
+# Clean up old output files — use rm -rf so subdirectories (e.g. query_results/)
+# are fully removed and stale benchmark_result.json cannot survive a cancelled run.
+rm -rf result_dir logs 2>/dev/null || true
+rm -f *.out *.err 2>/dev/null || true
 mkdir -p result_dir logs
 
 echo "Submitting Presto TPC-H benchmark job..."
@@ -33,8 +37,11 @@ SCALE_FACTOR=""
 NUM_ITERATIONS="2"
 EXTRA_ARGS=()
 NUM_GPUS_PER_NODE="4"
-WORKER_IMAGE="presto-native-worker-gpu"
-COORD_IMAGE="presto-coordinator"
+USE_NUMA="1"
+VARIANT_TYPE="gpu"
+WORKER_IMAGE=""
+COORD_IMAGE=""
+OUTPUT_PATH=""
 while [[ $# -gt 0 ]]; do
     case "$1" in
         -n|--nodes)
@@ -97,6 +104,25 @@ while [[ $# -gt 0 ]]; do
                 exit 1
             fi
             ;;
+        --no-numa)
+            USE_NUMA="0"
+            shift
+            ;;
+        --cpu)
+            VARIANT_TYPE="cpu"
+            NUM_GPUS_PER_NODE="1"
+            USE_NUMA="0"
+            shift
+            ;;
+        -o|--output-path)
+            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
+                OUTPUT_PATH="$2"
+                shift 2
+            else
+                echo "Error: -o|--output-path requires a value"
+                exit 1
+            fi
+            ;;
         --)
             shift
             break
@@ -118,13 +144,40 @@ if [[ -z "${SCALE_FACTOR}" ]]; then
     echo "Usage: $0 -n|--nodes <count> -s|--scale-factor <sf> [-i|--iterations <n>] [additional sbatch options]"
     exit 1
 fi
+if [[ -z "${WORKER_IMAGE}" ]]; then
+    echo "Error: -w|--worker-image is required"
+    exit 1
+fi
+if [[ -z "${COORD_IMAGE}" ]]; then
+    echo "Error: -c|--coord-image is required"
+    exit 1
+fi
 
 # Submit job (include nodes/SF/iterations in file names)
 OUT_FMT="presto-tpch-run_n${NODES_COUNT}_sf${SCALE_FACTOR}_i${NUM_ITERATIONS}_%j.out"
 ERR_FMT="presto-tpch-run_n${NODES_COUNT}_sf${SCALE_FACTOR}_i${NUM_ITERATIONS}_%j.err"
 SCRIPT_DIR="$PWD"
-JOB_ID=$(sbatch --nodes="${NODES_COUNT}" --export="ALL,SCALE_FACTOR=${SCALE_FACTOR},NUM_ITERATIONS=${NUM_ITERATIONS},SCRIPT_DIR=${SCRIPT_DIR},NUM_GPUS_PER_NODE=${NUM_GPUS_PER_NODE},WORKER_IMAGE=${WORKER_IMAGE},COORD_IMAGE=${COORD_IMAGE}" \
---output="${OUT_FMT}" --error="${ERR_FMT}" "${EXTRA_ARGS[@]}" --gres="gpu:${NUM_GPUS_PER_NODE}" \
+JOB_NAME="presto-tpch-run_n${NODES_COUNT}_sf${SCALE_FACTOR}"
+# NODELIST is unset by default -- Slurm picks any available nodes.
+# Export NODELIST=<host-or-range> before invoking to pin.
+NODELIST="${NODELIST:-}"
+NODELIST_ARG=()
+if [[ -n "${NODELIST}" ]]; then
+    NODELIST_ARG=(--nodelist="${NODELIST}")
+fi
+GRES_OPT=$([[ "$VARIANT_TYPE" == "gpu" ]] && echo "--gres=gpu:${NUM_GPUS_PER_NODE}" || echo "")
+EXPORT_VARS="ALL,SCALE_FACTOR=${SCALE_FACTOR},NUM_ITERATIONS=${NUM_ITERATIONS},SCRIPT_DIR=${SCRIPT_DIR},NUM_GPUS_PER_NODE=${NUM_GPUS_PER_NODE},WORKER_IMAGE=${WORKER_IMAGE},COORD_IMAGE=${COORD_IMAGE},USE_NUMA=${USE_NUMA},VARIANT_TYPE=${VARIANT_TYPE}"
+# Forward shared-metastore config from the calling shell so the slurm job
+# can populate from the shared snapshot when opted in.
+if [[ -n "${HIVE_METASTORE_VERSION:-}" ]]; then
+    EXPORT_VARS="${EXPORT_VARS},HIVE_METASTORE_VERSION=${HIVE_METASTORE_VERSION}"
+fi
+if [[ -n "${HIVE_METASTORE_SHARED_ROOT:-}" ]]; then
+    EXPORT_VARS="${EXPORT_VARS},HIVE_METASTORE_SHARED_ROOT=${HIVE_METASTORE_SHARED_ROOT}"
+fi
+JOB_ID=$(sbatch --job-name="${JOB_NAME}" --nodes="${NODES_COUNT}" "${NODELIST_ARG[@]}" \
+--export="${EXPORT_VARS}" \
+--output="${OUT_FMT}" --error="${ERR_FMT}" "${EXTRA_ARGS[@]}" ${GRES_OPT} \
 run-presto-benchmarks.slurm | awk '{print $NF}')
 OUT_FILE="${OUT_FMT//%j/${JOB_ID}}"
 ERR_FILE="${ERR_FMT//%j/${JOB_ID}}"
@@ -155,9 +208,9 @@ echo "Monitor job with:"
 echo "  squeue -j $JOB_ID"
 echo "  tail -f ${OUT_FILE}"
 echo "  tail -f ${ERR_FILE}"
-echo "  tail -f logs/coord_*.log"
+echo "  tail -f logs/coord.log"
 echo "  tail -f logs/worker_*.log"
-echo "  tail -f logs/cli_*.log"
+echo "  tail -f logs/cli.log"
 echo ""
 echo "Waiting for job to complete..."
 
@@ -176,6 +229,12 @@ echo "Showing job output:"
 echo "========================================"
 cat "${OUT_FILE}" 2>/dev/null || echo "No output available"
 echo "Showing benchmark results:"
-cli_log="$(ls -t logs/cli_*.log 2>/dev/null | head -1)"
-[ -z "${cli_log}" ] && cli_log="logs/cli.log"
-cat "${cli_log}" 2>/dev/null || echo "No CLI output available"
+cat logs/cli.log 2>/dev/null || echo "No CLI output available"
+
+if [[ -n "${OUTPUT_PATH}" ]]; then
+    echo ""
+    echo "Copying results to ${OUTPUT_PATH}..."
+    mkdir -p "${OUTPUT_PATH}"
+    cp -r result_dir/. "${OUTPUT_PATH}/"
+    echo "Results copied to ${OUTPUT_PATH}"
+fi
diff --git a/presto/slurm/presto-nvl72/pull_ghcr_image.sh b/presto/slurm/presto-nvl72/pull_ghcr_image.sh
new file mode 100755
index 00000000..302a3648
--- /dev/null
+++ b/presto/slurm/presto-nvl72/pull_ghcr_image.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Pull a Docker image from GitHub Container Registry (ghcr.io) and save it
+# as a .sqsh file using pyxis/enroot on a compute node.
+#
+# Requires enroot credentials to be set to access ghcr.io
+#
+# Usage:
+#   ./pull_ghcr_image.sh <ghcr.io/org/image:tag> [--output <path/to/image.sqsh>] [--overwrite]
+#
+# Examples:
+#   ./pull_ghcr_image.sh ghcr.io/myorg/presto-worker:latest
+#   ./pull_ghcr_image.sh ghcr.io/myorg/presto-worker:v1.2.3 --output /tmp/worker.sqsh
+#   ./pull_ghcr_image.sh ghcr.io/myorg/presto-worker:v1.2.3 --overwrite
+
+set -e
+
+source "$(dirname "${BASH_SOURCE[0]}")/defaults.env"
+
+usage() {
+    echo "Usage: $0 <ghcr.io/org/image:tag> [--output <path/to/image.sqsh>] [--overwrite]"
+    echo ""
+    echo "Options:"
+    echo "  --output, -o   Write the image to this exact path (overrides IMAGE_DIR)."
+    echo "  --overwrite    Re-pull even when the target .sqsh already exists."
+    echo ""
+    echo "Environment variables:"
+    echo "  IMAGE_DIR   Output directory when --output is not specified (default: \$IMAGE_DIR from defaults.env)"
+    exit 1
+}
+
+# Parse arguments
+IMAGE_REF=""
+OUTPUT_PATH=""
+OVERWRITE=0
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --output|-o)
+            [[ -n "${2:-}" ]] || { echo "Error: --output requires a value"; usage; }
+            OUTPUT_PATH="$2"
+            shift 2
+            ;;
+        --overwrite)
+            OVERWRITE=1
+            shift
+            ;;
+        -*)
+            echo "Unknown option: $1"
+            usage
+            ;;
+        *)
+            [[ -z "$IMAGE_REF" ]] || { echo "Error: unexpected argument '$1'"; usage; }
+            IMAGE_REF="$1"
+            shift
+            ;;
+    esac
+done
+
+[[ -n "$IMAGE_REF" ]] || { echo "Error: image reference is required"; usage; }
+
+# Validate it looks like a ghcr.io reference
+if [[ "$IMAGE_REF" != ghcr.io/* ]]; then
+    echo "Error: image reference must start with ghcr.io/ (got: $IMAGE_REF)"
+    exit 1
+fi
+
+# Convert ghcr.io/org/image:tag  ->  docker://ghcr.io#org/image:tag  (enroot import URI)
+ENROOT_URI="docker://${IMAGE_REF/ghcr.io\//ghcr.io#}"
+
+# Derive default output path from image name and tag
+if [[ -z "$OUTPUT_PATH" ]]; then
+    [[ -n "${IMAGE_DIR:-}" ]] || { echo "Error: IMAGE_DIR is not set (check defaults.env)"; exit 1; }
+    # Extract image name and tag: ghcr.io/org/image:tag -> image-tag
+    IMAGE_SLUG="${IMAGE_REF#ghcr.io/}"    # org/image:tag
+    IMAGE_SLUG="${IMAGE_SLUG##*/}"        # image:tag
+    IMAGE_SLUG="${IMAGE_SLUG//:/-}"       # image-tag
+    OUTPUT_PATH="$IMAGE_DIR/${IMAGE_SLUG}.sqsh"
+fi
+
+echo "Image:      $IMAGE_REF"
+echo "Output:     $OUTPUT_PATH"
+echo "Overwrite:  $([[ $OVERWRITE -eq 1 ]] && echo yes || echo no)"
+echo ""
+
+# Run enroot import directly as the job so it inherits ENROOT_GZIP_PROGRAM.
+# Pyxis (--container-image) runs enroot inside slurmstepd and ignores --export,
+# so we bypass pyxis entirely here.
+#
+# The existence check, mkdir, and enroot import all run on the compute node
+# because the default IMAGE_DIR (/scratch/$USER/images/presto) is not mounted
+# on the head node; checking or creating it from here would be inconsistent
+# with what the compute node sees.
+ENROOT_DECOMPRESS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/enroot-decompress.sh"
+export OUTPUT_PATH ENROOT_URI OVERWRITE
+
+srun --export="ALL,PMIX_MCA_gds=^ds12,ENROOT_GZIP_PROGRAM=${ENROOT_DECOMPRESS}" \
+    --nodes=1 --mem=0 --ntasks-per-node=1 \
+    --mpi=pmix_v4 \
+    bash -c '
+set -e
+if [[ -f "$OUTPUT_PATH" ]]; then
+    size=$(ls -lh "$OUTPUT_PATH" | awk "{print \$5}")
+    if [[ "$OVERWRITE" == "1" ]]; then
+        echo "Image already exists: $OUTPUT_PATH ($size)"
+        echo "--overwrite was passed; removing and re-pulling."
+        rm -f "$OUTPUT_PATH"
+    else
+        echo "Image already exists: $OUTPUT_PATH ($size)"
+        echo "Skipping pull.  Pass --overwrite to re-pull, or --output <path> to write elsewhere."
+        exit 0
+    fi
+fi
+mkdir -p "$(dirname "$OUTPUT_PATH")"
+enroot import --output "$OUTPUT_PATH" "$ENROOT_URI"
+echo ""
+echo "Saved: $(ls -lh "$OUTPUT_PATH")"
+'
diff --git a/presto/slurm/presto-nvl72/run-analyze-tables.sh b/presto/slurm/presto-nvl72/run-analyze-tables.sh
new file mode 100755
index 00000000..1ce97361
--- /dev/null
+++ b/presto/slurm/presto-nvl72/run-analyze-tables.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+set -exuo pipefail
+
+# ==============================================================================
+# Presto Analyze Tables Execution Script
+# ==============================================================================
+# Starts coordinator + workers, registers TPC-H tables in the Hive metastore,
+# then runs ANALYZE TABLE for the given scale factor.
+#
+# All configuration is passed via environment variables set by the .slurm script.
+
+source $SCRIPT_DIR/echo_helpers.sh
+source $SCRIPT_DIR/functions.sh
+
+# ==============================================================================
+# Setup: generate configs and prepare directories
+# ==============================================================================
+echo "Generating Presto configs..."
+generate_configs
+
+# ANALYZE TABLE is not supported with cudf enabled. Disable it in all worker
+# configs so workers run in CPU mode while still using the GPU worker image.
+echo "Disabling cudf in worker configs for ANALYZE TABLE compatibility..."
+for worker_conf in ${CONFIGS}/etc_worker*/config_native.properties; do
+    sed -i 's/^cudf\.enabled=true/cudf.enabled=false/' "${worker_conf}"
+done
+for worker_hive in ${CONFIGS}/etc_worker*/catalog/hive.properties; do
+    sed -i 's/^cudf\./#cudf./' "${worker_hive}"
+done
+
+echo "Creating hive metastore directory..."
+mkdir -p ${VT_ROOT}/.hive_metastore
+
+validate_config_directory
+
+# ==============================================================================
+# Start Coordinator
+# ==============================================================================
+echo "Starting Presto coordinator on ${COORD}..."
+run_coordinator
+wait_until_coordinator_is_running
+
+# ==============================================================================
+# Start Workers
+# ==============================================================================
+echo "Starting ${NUM_WORKERS} Presto workers across ${NUM_NODES} nodes..."
+worker_id=0
+for node in $(scontrol show hostnames "$SLURM_JOB_NODELIST"); do
+    for gpu_id in $(seq 0 $((NUM_GPUS_PER_NODE - 1))); do
+        echo "  Starting worker ${worker_id} on node ${node} GPU ${gpu_id}"
+        run_worker "${gpu_id}" "$WORKER_IMAGE" "${node}" "$worker_id"
+        worker_id=$((worker_id + 1))
+    done
+done
+
+# ==============================================================================
+# Wait for Workers to Register
+# ==============================================================================
+echo "Waiting for ${NUM_WORKERS} workers to register with coordinator..."
+wait_for_workers_to_register $NUM_WORKERS
+
+# ==============================================================================
+# Register Tables and Run ANALYZE TABLE
+# ==============================================================================
+echo "Registering TPC-H tables and running ANALYZE TABLE for tpchsf${SCALE_FACTOR}..."
+# The coordinator container only has Python 3.9, so python3.12 -m venv fails.
+# py_env_functions.sh falls back to conda when MINIFORGE_HOME is set.
+# Miniforge is installed at ${VT_ROOT}/miniforge3, which is mounted as
+# /workspace/miniforge3 inside the container.
+run_coord_image "export PRESTO_DATA_DIR=/var/lib/presto/data/hive/data/user_data; \
+    export MINIFORGE_HOME=/workspace/miniforge3; \
+    export HOME=/workspace; \
+    cd /workspace/presto/scripts; \
+    ./setup_benchmark_tables.sh \
+        -b tpch \
+        -d ${DATASET_NAME:-scale-${SCALE_FACTOR}} \
+        -s tpchsf${SCALE_FACTOR} \
+        -H ${COORD} \
+        -p ${PORT} \
+        --no-docker" "cli"
+
+echo "========================================"
+echo "Analyze tables complete!"
+echo "Hive metastore updated at: ${VT_ROOT}/.hive_metastore"
+echo "Logs available at: ${LOGS}"
+echo "========================================"
+
+# Auto-publish to the shared metastore when HIVE_METASTORE_VERSION is set and
+# the target slot is still empty.  See functions.sh:publish_hive_metastore_to_shared.
+if [[ -n "${HIVE_METASTORE_VERSION:-}" ]]; then
+    publish_hive_metastore_to_shared
+fi
diff --git a/presto/slurm/presto-nvl72/run-analyze-tables.slurm b/presto/slurm/presto-nvl72/run-analyze-tables.slurm
new file mode 100755
index 00000000..bbcda6f7
--- /dev/null
+++ b/presto/slurm/presto-nvl72/run-analyze-tables.slurm
@@ -0,0 +1,101 @@
+#!/bin/bash
+#SBATCH --job-name=presto-analyze-tables
+#SBATCH --time=08:00:00
+#SBATCH --ntasks-per-node=1
+#SBATCH --cpus-per-task=144
+#SBATCH --mem=0
+#SBATCH --exclusive
+
+# ==============================================================================
+# Presto Analyze Tables Job Script
+# ==============================================================================
+# Starts coordinator + workers, registers TPC-H tables, and runs ANALYZE TABLE
+# to collect statistics for the given scale factor.
+#
+# Required env vars (passed via --export):
+#   SCALE_FACTOR      - TPC-H scale factor (e.g. 100)
+#   SCRIPT_DIR        - Absolute path to this scripts directory
+#   NUM_GPUS_PER_NODE - Number of GPU workers per node
+#   WORKER_IMAGE      - Worker container image name (without .sqsh)
+#   COORD_IMAGE       - Coordinator container image name (without .sqsh)
+
+# ==============================================================================
+# Validate required variables
+# ==============================================================================
+if [ -z "${SCALE_FACTOR:-}" ]; then
+    echo "Error: SCALE_FACTOR is required." >&2
+    exit 1
+fi
+export SCALE_FACTOR
+
+if [ -z "${SCRIPT_DIR:-}" ]; then
+    echo "Error: SCRIPT_DIR is required." >&2
+    exit 1
+fi
+export SCRIPT_DIR
+
+if [ -z "${WORKER_IMAGE:-}" ]; then
+    echo "Error: WORKER_IMAGE is required." >&2
+    exit 1
+fi
+export WORKER_IMAGE
+
+if [ -z "${COORD_IMAGE:-}" ]; then
+    echo "Error: COORD_IMAGE is required." >&2
+    exit 1
+fi
+export COORD_IMAGE
+
+if [ -z "${NUM_GPUS_PER_NODE:-}" ]; then
+    echo "Error: NUM_GPUS_PER_NODE is required." >&2
+    exit 1
+fi
+export NUM_GPUS_PER_NODE
+
+# ==============================================================================
+# Paths - overriding defaults to use /scratch mount (only available on compute nodes)
+# ==============================================================================
+export VT_ROOT="$(cd -- "${SCRIPT_DIR}/../../.." >/dev/null 2>&1 && pwd -P)"
+source "${SCRIPT_DIR}/defaults.env"
+export DATA
+export HIVE_METASTORE_SHARED_ROOT HIVE_METASTORE_VERSION
+# Dataset subdirectory name under DATA (defaults to scale-<SF> if not set)
+: "${DATASET_NAME:=scale-${SCALE_FACTOR}}"
+export DATASET_NAME
+export IMAGE_DIR
+export LOGS=$SCRIPT_DIR/logs
+export VARIANT_TYPE=gpu
+export USE_NUMA=${USE_NUMA:-0}
+export CONFIGS=$VT_ROOT/presto/docker/config/generated/$VARIANT_TYPE
+
+# ==============================================================================
+# Computed values
+# ==============================================================================
+export NUM_NODES=$SLURM_JOB_NUM_NODES
+export COORD=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+export NUM_WORKERS=$((NUM_NODES * NUM_GPUS_PER_NODE))
+
+# Presto configuration
+export PORT=9200
+export CUDF_LIB=/usr/lib64/presto-native-libs
+
+# ==============================================================================
+# Pre-flight Info
+# ==============================================================================
+echo "========================================"
+echo "Job ID: $SLURM_JOB_ID"
+echo "Nodes: $SLURM_JOB_NUM_NODES"
+echo "Node list: $SLURM_JOB_NODELIST"
+echo "Coordinator node: $COORD"
+echo "Worker image: $WORKER_IMAGE"
+echo "Coord image: $COORD_IMAGE"
+echo "Scale factor: $SCALE_FACTOR"
+echo "Data directory: $DATA"
+echo "Image directory: $IMAGE_DIR"
+echo "Logs directory: $LOGS"
+echo "Total workers: $NUM_WORKERS (${NUM_NODES} nodes x ${NUM_GPUS_PER_NODE} GPUs)"
+echo "========================================"
+
+mkdir -p ${LOGS}
+
+bash $SCRIPT_DIR/run-analyze-tables.sh
diff --git a/presto/slurm/presto-nvl72/run-presto-benchmarks.sh b/presto/slurm/presto-nvl72/run-presto-benchmarks.sh
index 9d3fade5..630b84ce 100755
--- a/presto/slurm/presto-nvl72/run-presto-benchmarks.sh
+++ b/presto/slurm/presto-nvl72/run-presto-benchmarks.sh
@@ -14,6 +14,11 @@ set -exuo pipefail
 source $SCRIPT_DIR/echo_helpers.sh
 source $SCRIPT_DIR/functions.sh
 
+# Ensure metadata injection runs even if the script exits early (e.g. a worker
+# fails to register).  This guarantees benchmark_result.json always has a
+# context block with image_digest before the results are copied out.
+trap 'inject_benchmark_metadata' EXIT
+
 # ==============================================================================
 # Setup and Validation
 # ==============================================================================
@@ -47,10 +52,6 @@ done
 echo "Waiting for ${NUM_WORKERS} workers to register with coordinator..."
 wait_for_workers_to_register $NUM_WORKERS
 
-# Not currently needed because we are copying the hive metastore from the data source.
-#echo "Creating TPC-H schema and registering tables for scale factor ${SCALE_FACTOR}..."
-#setup_benchmark ${SCALE_FACTOR}
-
 # ==============================================================================
 # Run Queries
 # ==============================================================================
@@ -62,10 +63,13 @@ run_queries ${NUM_ITERATIONS} ${SCALE_FACTOR}
 # ==============================================================================
 echo "Processing results..."
 mkdir -p ${SCRIPT_DIR}/result_dir
-cp -r ${LOGS_DIR}/cli_${SERVER_START_TIMESTAMP}.log ${SCRIPT_DIR}/result_dir/summary.txt
+cp -r ${LOGS}/cli.log ${SCRIPT_DIR}/result_dir/summary.txt
+
+echo "Collecting configs and logs into result directory..."
+collect_results
 
 echo "========================================"
 echo "Benchmark complete!"
 echo "Results saved to: ${SCRIPT_DIR}/results_dir"
-echo "Logs available at: ${LOGS_DIR}"
+echo "Logs available at: ${LOGS}"
 echo "========================================"
diff --git a/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm b/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm
index 1d1edf80..39d4dd84 100755
--- a/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm
+++ b/presto/slurm/presto-nvl72/run-presto-benchmarks.slurm
@@ -38,17 +38,12 @@ export COORD_IMAGE
 
 # Assumes the repo root is four steps up from the script directory.  This should refer to velox-testing.
 export VT_ROOT="$(cd -- "${SCRIPT_DIR}/../../.." >/dev/null 2>&1 && pwd -P)"
-export DATA=/mnt/data/tpch-rs
-export IMAGE_DIR=/mnt/data/images/presto
-export LOGS_DIR="${LOGS_DIR:-${SCRIPT_DIR}/logs}"
-[ -L "${LOGS_DIR}" ] && rm -f "${LOGS_DIR}"
-mkdir -p "${LOGS_DIR}"
-if compgen -G "${LOGS_DIR}/*.log" > /dev/null 2>&1; then
-  mkdir -p "${LOGS_DIR}/archive"
-  mv "${LOGS_DIR}"/*.log "${LOGS_DIR}/archive/"
-fi
-export SERVER_START_TIMESTAMP="$(date +"%Y%m%dT%H%M%S")"
-export VARIANT_TYPE=gpu
+source "${SCRIPT_DIR}/defaults.env"
+export DATA
+export IMAGE_DIR
+export HIVE_METASTORE_SHARED_ROOT HIVE_METASTORE_VERSION
+export LOGS=$SCRIPT_DIR/logs
+export VARIANT_TYPE=${VARIANT_TYPE:-gpu}
 export CONFIGS=$VT_ROOT/presto/docker/config/generated/$VARIANT_TYPE
 
 # Container Images
@@ -61,12 +56,20 @@ export PORT=9200
 export CUDF_LIB=/usr/lib64/presto-native-libs
 
 # UCX Configuration
+#export UCX_TLS=^ib,ud:aux,sm
+#export UCX_LOG_LEVEL=DEBUG
+#export CX_PROTO_INFO=y
 export UCX_TLS=^ib,ud:aux,sm
-export UCX_MAX_RNDV_RAILS=1
+#export UCX_TLS=cuda_ipc,cuda_copy,sysv,tcp,self  # cuda_copy needed for cross-type (CPU<->GPU) memcpy; sysv for intra-node control; tcp for inter-node
+#export UCX_SOCKADDR_TLS_PRIORITY=tcp   # prefer TCP over rdmacm for CM
+export UCX_NET_DEVICES=bond0
+#export UCX_RNDV_THRESH=0               # force rendezvous; avoids TCP am_short/am_bcopy for GPU buffers
+#export UCX_MAX_RNDV_RAILS=1
 export UCX_RNDV_PIPELINE_ERROR_HANDLING=y
 export UCX_TCP_KEEPINTVL=1ms
 export UCX_KEEPALIVE_INTERVAL=1ms
 
+
 # ==============================================================================
 # Computed Values
 # ==============================================================================
@@ -94,10 +97,14 @@ echo "Coord image: $COORD_IMAGE"
 echo "Scale factor: $SCALE_FACTOR"
 echo "Iterations: $NUM_ITERATIONS"
 echo "Data directory: $DATA"
-echo "Logs directory: $LOGS_DIR"
-echo "Total workers: $NUM_WORKERS (${NUM_NODES} nodes × ${NUM_GPUS_PER_NODE} GPUs)"
+echo "Logs directory: $LOGS"
+echo "Variant: $VARIANT_TYPE"
+echo "Total workers: $NUM_WORKERS (${NUM_NODES} nodes × ${NUM_GPUS_PER_NODE} workers/node)"
 echo "Single node execution: $SINGLE_NODE_EXECUTION"
 echo "========================================"
 
+# Create necessary directories
+mkdir -p ${LOGS}
+
 # Launch the job script
 bash $SCRIPT_DIR/run-presto-benchmarks.sh
diff --git a/presto/slurm/presto-nvl72/run-sweep.sh b/presto/slurm/presto-nvl72/run-sweep.sh
new file mode 100755
index 00000000..284d4337
--- /dev/null
+++ b/presto/slurm/presto-nvl72/run-sweep.sh
@@ -0,0 +1,129 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+#!/bin/bash
+# ==============================================================================
+# Presto TPC-H Benchmark Sweep
+# ==============================================================================
+# Runs launch-run.sh + post_results.py for every combination of nodes and
+# scale factors defined below.
+#
+# Usage: ./run-sweep.sh [OPTIONS]
+#
+# Required options:
+#   --sku-name                    Hardware SKU name (e.g. raplab-gb200-nvl72)
+#   --storage-configuration-name  Storage configuration name passed to post_results.py
+#   --velox-branch                Velox branch used to build the worker image
+#   --presto-branch               Presto branch used to build the worker image
+#   --velox-repo                  Velox repository URL
+#   --presto-repo                 Presto repository URL
+#
+# Optional:
+#   -n, --nodes          Space-separated node counts to sweep (default: "8")
+#   -s, --scale-factors  Space-separated scale factors to sweep (default: "30000")
+#   -i, --iterations     Number of benchmark iterations (default: 3)
+#   --cache-state        Override cache state (default: derived from iterations:
+#                        1 iteration -> "lukewarm", 2+ iterations -> "warm")
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
+source "${SCRIPT_DIR}/defaults.env"
+
+# ------------------------------------------------------------------------------
+# Argument parsing
+# ------------------------------------------------------------------------------
+
+NODE_COUNTS=(8)
+SCALE_FACTORS=(30000)
+ITERATIONS=3
+SKU_NAME=""
+STORAGE_CONFIGURATION_NAME=""
+CACHE_STATE=""
+VELOX_BRANCH=""
+PRESTO_BRANCH=""
+VELOX_REPO=""
+PRESTO_REPO=""
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --sku-name)                    SKU_NAME="$2";                    shift 2 ;;
+        --storage-configuration-name)  STORAGE_CONFIGURATION_NAME="$2"; shift 2 ;;
+        --cache-state)                 CACHE_STATE="$2";                 shift 2 ;;
+        --velox-branch)                VELOX_BRANCH="$2";                shift 2 ;;
+        --presto-branch)    PRESTO_BRANCH="$2";  shift 2 ;;
+        --velox-repo)       VELOX_REPO="$2";     shift 2 ;;
+        --presto-repo)      PRESTO_REPO="$2";    shift 2 ;;
+        -n|--nodes)         read -ra NODE_COUNTS <<< "$2"; shift 2 ;;
+        -s|--scale-factors) read -ra SCALE_FACTORS <<< "$2"; shift 2 ;;
+        -i|--iterations)    ITERATIONS="$2";     shift 2 ;;
+        *) echo "Unknown option: $1"; exit 1 ;;
+    esac
+done
+
+for req in SKU_NAME STORAGE_CONFIGURATION_NAME VELOX_BRANCH PRESTO_BRANCH VELOX_REPO PRESTO_REPO; do
+    [[ -n "${!req}" ]] || { echo "Error: --${req//_/-} is required"; exit 1; }
+done
+
+if [[ -z "${CACHE_STATE}" ]]; then
+    [[ "${ITERATIONS}" -eq 1 ]] && CACHE_STATE="lukewarm" || CACHE_STATE="warm"
+fi
+
+# Seconds to wait between runs to allow the previous job's cudf exchange UCX
+# sockets to release their ports (10003, 10013, ...).  These ports are
+# deterministic (http_port+3 per worker) so a new job on the same nodes will
+# collide if the previous job's containers haven't fully torn down yet.
+INTER_RUN_SLEEP=90
+
+# ------------------------------------------------------------------------------
+
+total=$(( ${#NODE_COUNTS[@]} * ${#SCALE_FACTORS[@]} ))
+run=0
+
+for SF in "${SCALE_FACTORS[@]}"; do
+    for N in "${NODE_COUNTS[@]}"; do
+        run=$(( run + 1 ))
+        OUTPUT_DIR="${RESULTS_BASE}/result_sf${SF}_n${N}"
+
+        echo "========================================"
+        echo "Run ${run}/${total}: nodes=${N} scale_factor=${SF}"
+        echo "Output: ${OUTPUT_DIR}"
+        echo "========================================"
+
+        rm -rf "${OUTPUT_DIR}"
+        "${SCRIPT_DIR}/launch-run.sh" \
+            -n "${N}" \
+            -s "${SF}" \
+            -i "${ITERATIONS}" \
+            -o "${OUTPUT_DIR}"
+
+        echo ""
+        echo "Posting results for sf=${SF} n=${N}..."
+
+        "${VT_ROOT}/scripts/run_py_script.sh" \
+            -p "${VT_ROOT}/benchmark_reporting_tools/post_results.py" \
+            "${OUTPUT_DIR}" \
+            --sku-name "${SKU_NAME}" \
+            --storage-configuration-name "${STORAGE_CONFIGURATION_NAME}" \
+            --cache-state "${CACHE_STATE}" \
+            --benchmark-name "tpch-rs-${SF}" \
+            --velox-branch "${VELOX_BRANCH}" \
+            --presto-branch "${PRESTO_BRANCH}" \
+            --velox-repo "${VELOX_REPO}" \
+            --presto-repo "${PRESTO_REPO}"
+
+        echo ""
+        echo "Done: sf=${SF} n=${N}"
+
+        if (( run < total )); then
+            echo "Waiting ${INTER_RUN_SLEEP}s for worker UCX ports to be released before next run..."
+            sleep "${INTER_RUN_SLEEP}"
+        fi
+        echo ""
+    done
+done
+
+echo "========================================"
+echo "Sweep complete: ${total} runs finished."
+echo "========================================"
diff --git a/presto/slurm/presto-nvl72/run_interactive.sh b/presto/slurm/presto-nvl72/run_interactive.sh
new file mode 100755
index 00000000..21d841fb
--- /dev/null
+++ b/presto/slurm/presto-nvl72/run_interactive.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Interactive shell on a compute node with a container image.
+#
+# By default Slurm picks any available node in the partition.  Set NODELIST
+# to pin to a specific node or a range.
+# IMAGE, GRES, and TIME_LIMIT are also overridable via environment.
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/defaults.env"
+
+: "${IMAGE:=${IMAGE_DIR}/presto-native-worker-gpu.sqsh}"
+: "${GRES:=gpu:4}"
+: "${TIME_LIMIT:=01:00:00}"
+
+NODELIST_ARG=()
+if [[ -n "${NODELIST:-}" ]]; then
+    NODELIST_ARG=(--nodelist="${NODELIST}")
+fi
+
+srun --nodes=1 \
+     "${NODELIST_ARG[@]}" \
+     --ntasks-per-node=1 \
+     --gres="${GRES}" \
+     --exclusive \
+     --time="${TIME_LIMIT}" \
+     --container-image="${IMAGE}" \
+     --container-mounts="${HOME}:${HOME},/scratch:/scratch" \
+     --container-remap-root \
+     --container-writable \
+     --pty bash
diff --git a/presto/slurm/presto-nvl72/run_multiple.sh b/presto/slurm/presto-nvl72/run_multiple.sh
deleted file mode 100755
index 88339a82..00000000
--- a/presto/slurm/presto-nvl72/run_multiple.sh
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-NUM_ITERATIONS=2
-while [[ $# -gt 0 ]]; do
-    case "$1" in
-	 -n|--nodes)
-            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
-                NODES_COUNT="$2"
-                shift 2
-            else
-		echo "Error: -n|--nodes requires a set of comma separated values.  E.g. (2,4,8)"
-                echo "Usage: $0 -n|--nodes <count1,count2> -s|--scale-factor <sf1,sf2> -w <image_name> -c <image_name> [additional sbatch options]"
-                exit 1
-            fi
-            ;;
-        -s|--scale-factor)
-            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
-                SCALE_FACTOR="$2"
-                shift 2
-            else
-		echo "Error: -s|--scale-factor requires a set of comma separated values. E.g. (1000,3000)"
-		echo "Usage: $0 -n|--nodes <count1,count2> -s|--scale-factor <sf1,sf2> -w <image_name> -c <image_name> [additional sbatch options]"
-                exit 1
-            fi
-            ;;
-        -i|--iterations)
-            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
-                NUM_ITERATIONS="$2"
-                shift 2
-            else
-                echo "Error: -i|--iterations requires a value"
-		echo "Usage: $0 -n|--nodes <count1,count2> -s|--scale-factor <sf1,sf2> -w <image_name> -c <image_name> [additional sbatch options]"
-                exit 1
-            fi
-            ;;
-	-w|--worker-image)
-            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
-                WORKER_IMAGE="$2"
-                shift 2
-            else
-                echo "Error: -w|--worker-image requires a value"
-		echo "Usage: $0 -n|--nodes <count1,count2> -s|--scale-factor <sf1,sf2> -w <image_name> -c <image_name> [additional sbatch options]"
-                exit 1
-            fi
-            ;;
-	-c|--coord-image)
-            if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then
-                COORD_IMAGE="$2"
-                shift 2
-            else
-                echo "Error: -c|--coord-image requires a value"
-		echo "Usage: $0 -n|--nodes <count1,count2> -s|--scale-factor <sf1,sf2> -w <image_name> -c <image_name> [additional sbatch options]"
-                exit 1
-            fi
-            ;;
-        *)
-            EXTRA_ARGS+=("$1")
-            shift
-            ;;
-    esac
-done
-
-if [[ -z "${NODES_COUNT}" ]]; then
-    echo "Error: -n|--nodes is required"
-    exit 1
-fi
-if [[ -z "${SCALE_FACTOR}" ]]; then
-    echo "Error: -s|--scale-factor is required"
-    exit 1
-fi
-if [[ -z "${WORKER_IMAGE}" ]]; then
-    echo "Error: -w|--worker-image is required"
-    exit 1
-fi
-if [[ -z "${COORD_IMAGE}" ]]; then
-    echo "Error: -c|--coord-image is required"
-    exit 1
-fi
-
-
-mkdir -p kept_results
-
-IFS=',' read -ra NODES_ARRAY <<< "$NODES_COUNT"
-IFS=',' read -ra SF_ARRAY <<< "$SCALE_FACTOR"
-for s in "${SF_ARRAY[@]}"; do
-    for n in "${NODES_ARRAY[@]}"; do
-        ./launch-run.sh -s $s -n $n -i $NUM_ITERATIONS -w $WORKER_IMAGE -c $COORD_IMAGE
-        cli_log="$(ls -t logs/cli_*.log 2>/dev/null | head -1)"
-        [ -z "${cli_log}" ] && cli_log="logs/cli.log"
-        cp "${cli_log}" kept_results/${n}N-${s}SF-summary.txt
-    done
-done
diff --git a/scripts/py_env_functions.sh b/scripts/py_env_functions.sh
index 8d5dac5d..89fa6c6a 100755
--- a/scripts/py_env_functions.sh
+++ b/scripts/py_env_functions.sh
@@ -86,7 +86,9 @@ function init_python_virtual_env() {
     echo "Creating virtual environment using conda"
     # Downgrade the version of python in conda because there is an issue with conda's python 3.12 on ARM.
     # Installing python3.12 here leads to: "ModuleNotFoundError: No module named '_posixsubprocess'"
-    conda create -q -y --prefix "$venv_dir" python=3.11 > /dev/null
+    # Pin ncurses<6.5 on ARM (aarch64): ncurses-6.5 has a terminfo symlink
+    # conflict that makes `conda create` fail with EEXIST on nvl72 nodes.
+    conda create -q -y --prefix "$venv_dir" python=3.11 "ncurses<6.5" > /dev/null
   fi
 
   activate_python_virtual_env $venv_dir