From 0946333cb4a4b4c2bb0329e41c5c3931b8eb2489 Mon Sep 17 00:00:00 2001
From: Rohan138 <rohanpotdar138@gmail.com>
Date: Fri, 21 Mar 2025 12:14:50 -0500
Subject: [PATCH 1/8] Add local data support

---
 data.json                             |  3 ---
 models.json                           | 33 ++++-----------------------
 scripts/vllm/vllm_benchmark_report.sh |  5 ++++
 3 files changed, 10 insertions(+), 31 deletions(-)
 delete mode 100644 data.json

diff --git a/data.json b/data.json
deleted file mode 100644
index 6abaa86..0000000
--- a/data.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "huggingface": {}
-}
diff --git a/models.json b/models.json
index fa50cbd..775b560 100644
--- a/models.json
+++ b/models.json
@@ -33,7 +33,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -51,7 +50,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -69,7 +67,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -87,7 +84,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -105,7 +101,7 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
+    "data": "llama-2-7b-chat-hf",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -123,7 +119,7 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
+    "data": "llama-2-70b-chat-hf",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -141,7 +137,7 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
+    "data": "Mixtral-8x7B-Instruct-v0.1",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -159,7 +155,7 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
+    "data": "Mixtral-8x22B-Instruct-v0.1",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -177,7 +173,7 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
+    "data": "mistral-7b-v0.1",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -195,7 +191,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -213,7 +208,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -231,7 +225,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -249,7 +242,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -267,7 +259,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -285,7 +276,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -303,7 +293,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -321,7 +310,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -339,7 +327,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -357,7 +344,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -375,7 +361,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -393,7 +378,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -411,7 +395,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -429,7 +412,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -447,7 +429,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -465,7 +446,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -483,7 +463,6 @@
     "url": "",
     "dockerfile": "docker/pytorch_train",
     "scripts": "scripts/pytorch_train/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -500,7 +479,6 @@
     "url": "",
     "dockerfile": "docker/pytorch_train",
     "scripts": "scripts/pytorch_train/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -517,7 +495,6 @@
     "url": "",
     "dockerfile": "docker/pytorch_train",
     "scripts": "scripts/pytorch_train/run.sh",
-    "data": "huggingface",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
diff --git a/scripts/vllm/vllm_benchmark_report.sh b/scripts/vllm/vllm_benchmark_report.sh
index c3b8b21..e4c2dd6 100755
--- a/scripts/vllm/vllm_benchmark_report.sh
+++ b/scripts/vllm/vllm_benchmark_report.sh
@@ -50,6 +50,11 @@ model_org_name=(${model//// })
 model_name=${model_org_name[1]}
 tp=$numgpu
 
+# Use local data if present
+if [ -n $MAD_DATAHOME ]; then
+    model=$MAD_DATAHOME
+fi
+
 # perf configuration
 export VLLM_USE_TRITON_FLASH_ATTN=0
 export NCCL_MIN_NCHANNELS=112

From 9b1aa7724280076a8e2a22741ac9c58fd9484379 Mon Sep 17 00:00:00 2001
From: Rohan138 <rohanpotdar138@gmail.com>
Date: Fri, 21 Mar 2025 16:08:25 -0500
Subject: [PATCH 2/8] Remove data field

---
 models.json | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/models.json b/models.json
index 775b560..1e9b501 100644
--- a/models.json
+++ b/models.json
@@ -101,7 +101,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "llama-2-7b-chat-hf",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -119,7 +118,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "llama-2-70b-chat-hf",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -137,7 +135,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "Mixtral-8x7B-Instruct-v0.1",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -155,7 +152,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "Mixtral-8x22B-Instruct-v0.1",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
@@ -173,7 +169,6 @@
     "url": "",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
-    "data": "mistral-7b-v0.1",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",

From 1922826f36b1b82eb178f08bf8d61dc16ca685ea Mon Sep 17 00:00:00 2001
From: Rohan138 <rohanpotdar138@gmail.com>
Date: Mon, 7 Apr 2025 12:40:58 -0500
Subject: [PATCH 3/8] Add echo

---
 scripts/vllm/vllm_benchmark_report.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/vllm/vllm_benchmark_report.sh b/scripts/vllm/vllm_benchmark_report.sh
index e4c2dd6..f18137a 100755
--- a/scripts/vllm/vllm_benchmark_report.sh
+++ b/scripts/vllm/vllm_benchmark_report.sh
@@ -52,6 +52,7 @@ tp=$numgpu
 
 # Use local data if present
 if [ -n $MAD_DATAHOME ]; then
+    echo "Using data from MAD_DATAHOME"
     model=$MAD_DATAHOME
 fi
 

From ab7de7b56585f52a319a687cadb35594034f85f7 Mon Sep 17 00:00:00 2001
From: Rohan138 <rohanpotdar138@gmail.com>
Date: Mon, 7 Apr 2025 12:42:51 -0500
Subject: [PATCH 4/8] Add echo

---
 scripts/vllm/vllm_benchmark_report.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/vllm/vllm_benchmark_report.sh b/scripts/vllm/vllm_benchmark_report.sh
index f18137a..1f39b2d 100755
--- a/scripts/vllm/vllm_benchmark_report.sh
+++ b/scripts/vllm/vllm_benchmark_report.sh
@@ -52,7 +52,7 @@ tp=$numgpu
 
 # Use local data if present
 if [ -n $MAD_DATAHOME ]; then
-    echo "Using data from MAD_DATAHOME"
+    echo "Using data from $MAD_DATAHOME"
     model=$MAD_DATAHOME
 fi
 

From 9c1d328f75cf2d887112a97b4feb2da7831eaa2b Mon Sep 17 00:00:00 2001
From: Rohan138 <rohanpotdar138@gmail.com>
Date: Mon, 7 Apr 2025 12:59:11 -0500
Subject: [PATCH 5/8] Add echo

---
 scripts/vllm/vllm_benchmark_report.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/vllm/vllm_benchmark_report.sh b/scripts/vllm/vllm_benchmark_report.sh
index 1f39b2d..bd5f5d2 100755
--- a/scripts/vllm/vllm_benchmark_report.sh
+++ b/scripts/vllm/vllm_benchmark_report.sh
@@ -51,7 +51,7 @@ model_name=${model_org_name[1]}
 tp=$numgpu
 
 # Use local data if present
-if [ -n $MAD_DATAHOME ]; then
+if [ -n "$MAD_DATAHOME" ]; then
     echo "Using data from $MAD_DATAHOME"
     model=$MAD_DATAHOME
 fi

From 99cf154c24041dbc3aca22712336994889b4afad Mon Sep 17 00:00:00 2001
From: Rohan138 <rohanpotdar138@gmail.com>
Date: Mon, 28 Apr 2025 10:43:05 -0500
Subject: [PATCH 6/8] Add llama2 7b and 70b nas for testing

---
 models.json | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/models.json b/models.json
index c9d75ac..89d7c4e 100644
--- a/models.json
+++ b/models.json
@@ -99,6 +99,7 @@
   {
     "name": "pyt_vllm_llama-2-7b",
     "url": "",
+    "data": "llama-2-7b-chat-hf",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",
@@ -116,6 +117,7 @@
   {
     "name": "pyt_vllm_llama-2-70b",
     "url": "",
+    "data": "llama-2-70b-chat-hf",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",

From b861623eeeb15f0561271226ade9fecbab29fa03 Mon Sep 17 00:00:00 2001
From: Rohan138 <rohanpotdar138@gmail.com>
Date: Thu, 8 May 2025 16:15:16 -0500
Subject: [PATCH 7/8] Update Llama and Mistral data

---
 models.json | 47 +++++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/models.json b/models.json
index ba686a5..84409e3 100644
--- a/models.json
+++ b/models.json
@@ -29,112 +29,117 @@
     "args": ""
   },
   {
-    "name": "pyt_vllm_llama-3.1-8b",
+    "name": "pyt_vllm_llama-2-7b",
     "url": "",
+    "data": "meta-llama/Llama-2-7b-chat-hf",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
-    "multiple_results": "perf_Llama-3.1-8B-Instruct.csv",
+    "multiple_results": "perf_Llama-2-7b-chat-hf.csv",
     "tags": [
       "pyt",
       "vllm"
     ],
     "timeout": -1,
     "args":
-     "--model_repo meta-llama/Llama-3.1-8B-Instruct --test_option latency,throughput --num_gpu 1 --datatype float16 --tunableop off"
+     "--model_repo meta-llama/Llama-2-7b-chat-hf --test_option latency,throughput --num_gpu 1 --datatype float16 --tunableop off"
   },
   {
-    "name": "pyt_vllm_llama-3.1-70b",
+    "name": "pyt_vllm_llama-2-70b",
     "url": "",
+    "data": "meta-llama/Llama-2-70b-chat-hf",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
-    "multiple_results": "perf_Llama-3.1-70B-Instruct.csv",
+    "multiple_results": "perf_Llama-2-70b-chat-hf.csv",
     "tags": [
       "pyt",
       "vllm"
     ],
     "timeout": -1,
     "args":
-     "--model_repo meta-llama/Llama-3.1-70B-Instruct --test_option latency,throughput --num_gpu 8 --datatype float16 --tunableop off"
+     "--model_repo meta-llama/Llama-2-70b-chat-hf --test_option latency,throughput --num_gpu 8 --datatype float16 --tunableop off"
   },
   {
-    "name": "pyt_vllm_llama-3.1-405b",
+    "name": "pyt_vllm_llama-3.1-8b",
     "url": "",
+    "data": "meta-llama/Llama-3.1-8B-Instruct",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
-    "multiple_results": "perf_Llama-3.1-405B-Instruct.csv",
+    "multiple_results": "perf_Llama-3.1-8B-Instruct.csv",
     "tags": [
       "pyt",
       "vllm"
     ],
     "timeout": -1,
     "args":
-     "--model_repo meta-llama/Llama-3.1-405B-Instruct --test_option latency,throughput --num_gpu 8 --datatype float16 --tunableop off"
+     "--model_repo meta-llama/Llama-3.1-8B-Instruct --test_option latency,throughput --num_gpu 1 --datatype float16 --tunableop off"
   },
   {
-    "name": "pyt_vllm_llama-3.2-11b-vision-instruct",
+    "name": "pyt_vllm_llama-3.1-70b",
     "url": "",
+    "data": "meta-llama/Llama-3.1-70B-Instruct",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
-    "multiple_results": "perf_Llama-3.2-11B-Vision-Instruct.csv",
+    "multiple_results": "perf_Llama-3.1-70B-Instruct.csv",
     "tags": [
       "pyt",
       "vllm"
     ],
     "timeout": -1,
     "args":
-     "--model_repo meta-llama/Llama-3.2-11B-Vision-Instruct --test_option latency,throughput --num_gpu 1 --datatype float16 --tunableop off"
+     "--model_repo meta-llama/Llama-3.1-70B-Instruct --test_option latency,throughput --num_gpu 8 --datatype float16 --tunableop off"
   },
   {
-    "name": "pyt_vllm_llama-2-7b",
+    "name": "pyt_vllm_llama-3.1-405b",
     "url": "",
-    "data": "llama-2-7b-chat-hf",
+    "data": "meta-llama/Llama-3.1-405B-Instruct",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
-    "multiple_results": "perf_Llama-2-7b-chat-hf.csv",
+    "multiple_results": "perf_Llama-3.1-405B-Instruct.csv",
     "tags": [
       "pyt",
       "vllm"
     ],
     "timeout": -1,
     "args":
-     "--model_repo meta-llama/Llama-2-7b-chat-hf --test_option latency,throughput --num_gpu 1 --datatype float16 --tunableop off"
+     "--model_repo meta-llama/Llama-3.1-405B-Instruct --test_option latency,throughput --num_gpu 8 --datatype float16 --tunableop off"
   },
   {
-    "name": "pyt_vllm_llama-2-70b",
+    "name": "pyt_vllm_llama-3.2-11b-vision-instruct",
     "url": "",
-    "data": "llama-2-70b-chat-hf",
+    "data": "meta-llama/Llama-3.2-11B-Vision-Instruct",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",
     "owner": "mad.support@amd.com",
     "training_precision": "",
-    "multiple_results": "perf_Llama-2-70b-chat-hf.csv",
+    "multiple_results": "perf_Llama-3.2-11B-Vision-Instruct.csv",
     "tags": [
       "pyt",
       "vllm"
     ],
     "timeout": -1,
     "args":
-     "--model_repo meta-llama/Llama-2-70b-chat-hf --test_option latency,throughput --num_gpu 8 --datatype float16 --tunableop off"
+     "--model_repo meta-llama/Llama-3.2-11B-Vision-Instruct --test_option latency,throughput --num_gpu 1 --datatype float16 --tunableop off"
   },
   {
     "name": "pyt_vllm_mixtral-8x7b",
     "url": "",
+    "data": "mistralai/Mixtral-8x7B-Instruct-v0.1",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",
@@ -152,6 +157,7 @@
   {
     "name": "pyt_vllm_mixtral-8x22b",
     "url": "",
+    "data": "mistralai/Mixtral-8x22B-Instruct-v0.1",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",
@@ -169,6 +175,7 @@
   {
     "name": "pyt_vllm_mistral-7b",
     "url": "",
+    "data": "mistralai/Mistral-7B-Instruct-v0.1",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",

From 7c9d80fa498105833a142067dbb134250055a9e0 Mon Sep 17 00:00:00 2001
From: Rohan138 <rohanpotdar138@gmail.com>
Date: Fri, 9 May 2025 01:08:57 -0500
Subject: [PATCH 8/8] Add entries for llama FP8

---
 models.json | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/models.json b/models.json
index 84409e3..7b73ddd 100644
--- a/models.json
+++ b/models.json
@@ -295,6 +295,7 @@
   {
     "name": "pyt_vllm_llama-3.1-8b_fp8",
     "url": "",
+    "data": "amd/Llama-3.1-8B-Instruct-FP8-KV",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",
@@ -312,6 +313,7 @@
   {
     "name": "pyt_vllm_llama-3.1-70b_fp8",
     "url": "",
+    "data": "amd/Llama-3.1-70B-Instruct-FP8-KV",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",
@@ -329,6 +331,7 @@
   {
     "name": "pyt_vllm_llama-3.1-405b_fp8",
     "url": "",
+    "data": "amd/Llama-3.1-405B-Instruct-FP8-KV",
     "dockerfile": "docker/pyt_vllm",
     "scripts": "scripts/vllm/run.sh",
     "n_gpus": "-1",