From 5b37e10e58327734591b1edb34f4b446b214046b Mon Sep 17 00:00:00 2001
From: angelayi
Date: Fri, 13 Mar 2026 16:09:23 -0700
Subject: [PATCH] Add multi-compile-call support to vLLM summary
Refactor VllmState from flat fields to a Vec structure,
where each compile call tracks its own config, subgraphs, and artifacts.
The summary template now loops over compile calls with collapsible
sections when multiple calls are present, and displays shared config
separately when all calls use identical settings.
[ghstack-poisoned]
---
src/vllm/parsers.rs | 370 +++++++++++++++++++++++++++++++-----------
src/vllm/templates.rs | 125 +++++++++++---
src/vllm/types.rs | 36 +++-
3 files changed, 406 insertions(+), 125 deletions(-)
diff --git a/src/vllm/parsers.rs b/src/vllm/parsers.rs
index 6e32b2d..8a00eea 100644
--- a/src/vllm/parsers.rs
+++ b/src/vllm/parsers.rs
@@ -3,8 +3,9 @@ use crate::templates::TEMPLATE_QUERY_PARAM_SCRIPT;
use crate::types::{CompileId, Envelope};
use super::types::{
- ArtifactInfo, VllmCompilationConfig, VllmCompileRangeGroup, VllmDiffContext,
- VllmSubgraphInfo, VllmSubgraphWithArtifacts, VllmSummaryContext,
+ ArtifactInfo, VllmCompileCall, VllmCompileCallContext, VllmCompileRangeGroup,
+ VllmCompilationConfig, VllmDiffContext, VllmSubgraphInfo, VllmSubgraphWithArtifacts,
+ VllmSummaryContext,
};
use std::cell::RefCell;
@@ -14,11 +15,11 @@ use tinytemplate::TinyTemplate;
#[derive(Debug, Default)]
pub struct VllmState {
- pub config: RefCell
- {{ if has_dynamo_artifacts }}
+ {{ for call in compile_calls }}
+ {{ if has_multiple_calls }}
+
+ Compile Call {call.display_index}{{ if call.label }}: {call.label}{{ endif }}{{ if call.is_cache_hit }} ✅{{ endif }}
+
+ {{ endif }}
+
+ {{ if call.has_config }}
+
Compilation Configuration
+
+ Core Settings
+
+ | Model | {call.config.model} |
+ | Mode | {call.config.mode} |
+ | Backend | {call.config.backend} |
+ | Prefix | {call.config.prefix} |
+ | Custom Ops | {call.config.custom_ops} |
+ | Splitting Ops | {call.config.splitting_ops} |
+
+
+
+ Compile Settings
+
+ | CUDAGraph Mode | {call.config.cudagraph_mode} |
+ | Use Inductor Graph Partition | {call.config.use_inductor_graph_partition} |
+ | Compile Sizes | {call.config.compile_sizes} |
+ | Compile Ranges Endpoints | {call.config.compile_ranges_split_points} |
+ | Inductor Passes | {call.config.inductor_passes} |
+ | Enabled Passes | {call.config.enabled_passes} |
+ | Dynamic Shapes Type | {call.config.dynamic_shapes_type} |
+ | Dynamic Shapes Evaluate Guards | {call.config.dynamic_shapes_evaluate_guards} |
+
+
+ {{ endif }}
+
+ {{ if call.has_dynamo_artifacts }}
Dynamo Compilation
- {{ for artifact in dynamo_artifacts }}
+ {{ for artifact in call.dynamo_artifacts }}
- {artifact.name} {artifact.suffix}
{{ endfor }}
{{ endif }}
- {{ if has_piecewise }}
+ {{ if call.has_piecewise }}
Piecewise Split Graph
{{ endif }}
- {{ if has_pattern_artifacts }}
+ {{ if call.has_pattern_artifacts }}
Inductor Pass Patterns
- {{ for artifact in pattern_artifacts }}
+ {{ for artifact in call.pattern_artifacts }}
- {artifact.name} {artifact.suffix}
{{ endfor }}
{{ endif }}
+ {{ if call.compile_range_groups }}
Inductor Compilation
- {{ for group in compile_range_groups }}
+ {{ for group in call.compile_range_groups }}
{group.size_or_range}
@@ -327,6 +399,13 @@ pub const VLLM_SUMMARY_TEMPLATE: &str = r#"
{{ endfor }}
+ {{ endif }}
+
+ {{ if has_multiple_calls }}
+
+
+ {{ endif }}
+ {{ endfor }}
{qps | format_unescaped}