From 5b37e10e58327734591b1edb34f4b446b214046b Mon Sep 17 00:00:00 2001 From: angelayi Date: Fri, 13 Mar 2026 16:09:23 -0700 Subject: [PATCH] Add multi-compile-call support to vLLM summary Refactor VllmState from flat fields to a Vec structure, where each compile call tracks its own config, subgraphs, and artifacts. The summary template now loops over compile calls with collapsible sections when multiple calls are present, and displays shared config separately when all calls use identical settings. [ghstack-poisoned] --- src/vllm/parsers.rs | 370 +++++++++++++++++++++++++++++++----------- src/vllm/templates.rs | 125 +++++++++++--- src/vllm/types.rs | 36 +++- 3 files changed, 406 insertions(+), 125 deletions(-) diff --git a/src/vllm/parsers.rs b/src/vllm/parsers.rs index 6e32b2d..8a00eea 100644 --- a/src/vllm/parsers.rs +++ b/src/vllm/parsers.rs @@ -3,8 +3,9 @@ use crate::templates::TEMPLATE_QUERY_PARAM_SCRIPT; use crate::types::{CompileId, Envelope}; use super::types::{ - ArtifactInfo, VllmCompilationConfig, VllmCompileRangeGroup, VllmDiffContext, - VllmSubgraphInfo, VllmSubgraphWithArtifacts, VllmSummaryContext, + ArtifactInfo, VllmCompileCall, VllmCompileCallContext, VllmCompileRangeGroup, + VllmCompilationConfig, VllmDiffContext, VllmSubgraphInfo, VllmSubgraphWithArtifacts, + VllmSummaryContext, }; use std::cell::RefCell; @@ -14,11 +15,11 @@ use tinytemplate::TinyTemplate; #[derive(Debug, Default)] pub struct VllmState { - pub config: RefCell>, - pub piecewise_graph_file: RefCell>, - pub subgraphs: RefCell>, - pub pre_subgraph_artifacts: RefCell>, + pub compile_calls: RefCell>, pub has_vllm_artifacts: RefCell, + /// Buffered compile event type from vllm_compile_event, to be attached to + /// the next compile call (since the event arrives before vllm_compilation_config). + pending_compile_event_type: RefCell>, } impl VllmState { @@ -30,8 +31,51 @@ impl VllmState { *self.has_vllm_artifacts.borrow() } - // Add artifact to current subgraph, or pre_subgraph_artifacts if no subgraph yet + /// Push a new compile call when a `vllm_compilation_config` is seen. + /// If the last call has no config yet (created by ensure_compile_call for early artifacts), + /// populate it instead of creating a new one. + /// Consumes any pending compile event type from a prior vllm_compile_event. + pub fn push_compile_call(&self, config: VllmCompilationConfig) { + let pending_event = self.pending_compile_event_type.borrow_mut().take(); + let mut calls = self.compile_calls.borrow_mut(); + if let Some(last) = calls.last_mut() { + if last.config.is_none() { + last.config = Some(config); + if last.compile_event_type.is_none() { + last.compile_event_type = pending_event; + } + return; + } + } + let index = calls.len(); + calls.push(VllmCompileCall { + index, + config: Some(config), + compile_event_type: pending_event, + ..Default::default() + }); + } + + /// Buffer a compile event type to be attached to the next compile call. + /// The event always arrives before its corresponding vllm_compilation_config, + /// so we buffer it until push_compile_call consumes it. + pub fn set_pending_compile_event(&self, event_type: String) { + *self.pending_compile_event_type.borrow_mut() = Some(event_type); + } + + /// Ensure at least one compile call exists. Creates a default one if empty. + fn ensure_compile_call(&self) { + let mut calls = self.compile_calls.borrow_mut(); + if calls.is_empty() { + calls.push(VllmCompileCall::default()); + } + } + + /// Add artifact to the current (last) compile call's last subgraph, + /// or to pre_subgraph_artifacts if no subgraph exists yet. pub fn add_artifact(&self, filename: &std::path::Path, suffix: String) { + self.ensure_compile_call(); + let url = filename.to_string_lossy().to_string(); let name = filename .file_stem() @@ -39,85 +83,87 @@ impl VllmState { .map(|s| s.to_string()) .unwrap_or_else(|| url.clone()); - // Track piecewise split graph file for linking in summary + // Track piecewise split graph file on the current compile call if name.starts_with("vllm_piecewise_split_graph") { - *self.piecewise_graph_file.borrow_mut() = Some(url.clone()); + let mut calls = self.compile_calls.borrow_mut(); + if let Some(last) = calls.last_mut() { + last.piecewise_graph_file = Some(url.clone()); + } } let artifact = ArtifactInfo { name, url, suffix }; - let mut subgraphs = self.subgraphs.borrow_mut(); - if let Some(last) = subgraphs.last_mut() { - last.artifacts.push(artifact); - } else { - self.pre_subgraph_artifacts.borrow_mut().push(artifact); + let mut calls = self.compile_calls.borrow_mut(); + if let Some(call) = calls.last_mut() { + if let Some(last_subgraph) = call.subgraphs.last_mut() { + last_subgraph.artifacts.push(artifact); + } else { + call.pre_subgraph_artifacts.push(artifact); + } } } +} - // Group subgraphs by compile range/size for hierarchical display - pub fn build_compile_range_groups(&self) -> Vec { - use indexmap::IndexMap; - - let subgraphs = self.subgraphs.borrow(); - let mut groups: IndexMap> = IndexMap::new(); - - for subgraph in subgraphs.iter() { - let size_or_range = subgraph.size_or_range(); - let (pass_artifacts, artifacts): (Vec<_>, Vec<_>) = subgraph - .artifacts - .iter() - .cloned() - .partition(|a| a.name.contains("vllm_post_grad.")); - let artifact_count = artifacts.len(); - let pass_artifact_count = pass_artifacts.len(); - let has_pass_artifacts = pass_artifact_count > 0; - groups - .entry(size_or_range) - .or_default() - .push(VllmSubgraphWithArtifacts { - submod_name: subgraph.display_submod_name(), - artifacts, - artifact_count, - pass_artifacts, - pass_artifact_count, - has_pass_artifacts, - }); - } +// Group subgraphs by compile range/size for hierarchical display +fn build_compile_range_groups(call: &VllmCompileCall) -> Vec { + use indexmap::IndexMap; - groups - .into_iter() - .map(|(size_or_range, submods)| VllmCompileRangeGroup { - size_or_range, - submod_count: submods.len(), - submods, - }) - .collect() - } + let mut groups: IndexMap> = IndexMap::new(); - // Get pattern artifacts from pre_subgraph_artifacts - pub fn build_pattern_artifacts(&self) -> Vec { - self.pre_subgraph_artifacts - .borrow() + for subgraph in call.subgraphs.iter() { + let size_or_range = subgraph.size_or_range(); + let (pass_artifacts, artifacts): (Vec<_>, Vec<_>) = subgraph + .artifacts .iter() - .filter(|a| a.name.starts_with("vllm_patterns.")) .cloned() - .collect() + .partition(|a| a.name.contains("vllm_post_grad.")); + let artifact_count = artifacts.len(); + let pass_artifact_count = pass_artifacts.len(); + let has_pass_artifacts = pass_artifact_count > 0; + groups + .entry(size_or_range) + .or_default() + .push(VllmSubgraphWithArtifacts { + submod_name: subgraph.display_submod_name(), + artifacts, + artifact_count, + pass_artifacts, + pass_artifact_count, + has_pass_artifacts, + }); } - // Get dynamo artifacts from pre_subgraph_artifacts - pub fn build_dynamo_artifacts(&self) -> Vec { - let dynamo_names = [ - "dynamo_side_effects", - "dynamo_output_graph", - "dynamo_cpp_guards_str", - "compilation_metrics", - ]; - self.pre_subgraph_artifacts - .borrow() - .iter() - .filter(|a| dynamo_names.iter().any(|name| a.name.starts_with(name))) - .cloned() - .collect() - } + groups + .into_iter() + .map(|(size_or_range, submods)| VllmCompileRangeGroup { + size_or_range, + submod_count: submods.len(), + submods, + }) + .collect() +} + +// Get dynamo artifacts from pre_subgraph_artifacts +fn build_dynamo_artifacts(call: &VllmCompileCall) -> Vec { + let dynamo_names = [ + "dynamo_side_effects", + "dynamo_output_graph", + "dynamo_cpp_guards_str", + "compilation_metrics", + ]; + call.pre_subgraph_artifacts + .iter() + .filter(|a| dynamo_names.iter().any(|name| a.name.starts_with(name))) + .cloned() + .collect() +} + +// Get pattern artifacts from pre_subgraph_artifacts +fn build_pattern_artifacts(call: &VllmCompileCall) -> Vec { + call.pre_subgraph_artifacts + .iter() + .filter(|a| a.name.starts_with("vllm_patterns.")) + .cloned() + .collect() } // Parses vllm_compilation_config artifacts. @@ -155,7 +201,7 @@ impl StructuredLogParser for VllmCompilationConfigParser { payload: &str, ) -> anyhow::Result { if let Ok(config) = serde_json::from_str::(payload) { - *self.state.config.borrow_mut() = Some(config); + self.state.push_compile_call(config); *self.state.has_vllm_artifacts.borrow_mut() = true; } @@ -167,8 +213,55 @@ impl StructuredLogParser for VllmCompilationConfigParser { } } +// Parses vllm_compile_event artifacts emitted by vLLM's @support_torch_compile decorator. +// Stores the event type (aot_cache_hit / fresh_compile) on the current compile call. +pub struct VllmCompileEventParser { + state: Rc, +} + +impl VllmCompileEventParser { + pub fn new(state: Rc) -> Self { + Self { state } + } +} + +#[derive(serde::Deserialize)] +struct VllmCompileEvent { + #[serde(rename = "type")] + event_type: String, +} + +impl StructuredLogParser for VllmCompileEventParser { + fn name(&self) -> &'static str { + "vllm_compile_event" + } + + fn get_metadata<'e>(&self, e: &'e Envelope) -> Option> { + if let Some(artifact) = &e.artifact { + if artifact.name == "vllm_compile_event" { + return Some(Metadata::Artifact(artifact)); + } + } + None + } + + fn parse<'e>( + &self, + _lineno: usize, + _metadata: Metadata<'e>, + _rank: Option, + _compile_id: &Option, + payload: &str, + ) -> anyhow::Result { + if let Ok(event) = serde_json::from_str::(payload) { + self.state.set_pending_compile_event(event.event_type); + } + Ok(Vec::new()) + } +} + // Parses vllm_piecewise_compile_start artifacts and vllm_subgraph_*/vllm_submod_* graph dumps. -// On compile_start: pushes new VllmSubgraphInfo to state.subgraphs (subsequent artifacts attach here). +// On compile_start: pushes new VllmSubgraphInfo to the current compile call's subgraphs. // On graph_dump: adds artifact to current subgraph and outputs the graph file. pub struct VllmPiecewiseCompileParser { state: Rc, @@ -214,7 +307,11 @@ impl StructuredLogParser for VllmPiecewiseCompileParser { match metadata { Metadata::Artifact(_artifact) => { if let Ok(subgraph) = serde_json::from_str::(payload) { - self.state.subgraphs.borrow_mut().push(subgraph); + self.state.ensure_compile_call(); + let mut calls = self.state.compile_calls.borrow_mut(); + if let Some(call) = calls.last_mut() { + call.subgraphs.push(subgraph); + } } Ok(Vec::new()) } @@ -271,13 +368,13 @@ impl StructuredLogParser for VllmPiecewiseSplitGraphParser { } } -// Parses two kinds of log entries to produce per-pass diff pages: +// Parses three kinds of log entries to produce per-pass diff pages: // // 1. "before_post_grad_graph" artifact — the graph before any passes run. // Stored as the diff baseline; no file output (ArtifactParser handles that). // -// 2. "vllm_patterns." graph dump — pattern matcher patterns. -// Output as a standalone .py file (no diffing). +// 2. "vllm_patterns." graph dump — pattern matcher source for a pass. +// Output as a standalone .py file (linked from the summary page). // // 3. "vllm_post_grad.." graph dump — the graph after a pass. // Diffed against `previous_payload` to produce a side-by-side HTML diff, @@ -287,6 +384,8 @@ pub struct VllmPostGradPassDiffParser { // The graph payload from the previous pass (or before_post_grad_graph), // used as the "before" side of the next diff. previous_payload: RefCell>, + /// Tracks which compile call we're in, to reset baseline on new compile call. + current_compile_call_index: RefCell, } impl VllmPostGradPassDiffParser { @@ -294,6 +393,18 @@ impl VllmPostGradPassDiffParser { Self { state, previous_payload: RefCell::new(None), + current_compile_call_index: RefCell::new(0), + } + } + + /// Check if we've moved to a new compile call, and reset baseline if so. + fn check_compile_call_change(&self) { + let calls = self.state.compile_calls.borrow(); + let current_idx = if calls.is_empty() { 0 } else { calls.len() - 1 }; + let mut tracked_idx = self.current_compile_call_index.borrow_mut(); + if current_idx != *tracked_idx { + *tracked_idx = current_idx; + *self.previous_payload.borrow_mut() = None; } } @@ -450,6 +561,8 @@ impl StructuredLogParser for VllmPostGradPassDiffParser { compile_id: &Option, payload: &str, ) -> anyhow::Result { + self.check_compile_call_change(); + // before_post_grad_graph (artifact): seed baseline for first pass diff. // Don't output a file — the default ArtifactParser handles that. if matches!(metadata, Metadata::Artifact(a) if a.name == "before_post_grad_graph") { @@ -464,7 +577,7 @@ impl StructuredLogParser for VllmPostGradPassDiffParser { *self.state.has_vllm_artifacts.borrow_mut() = true; - // Handle vllm_patterns.* graph dumps: output as standalone .py file + // Handle vllm_patterns.* graph dumps: output as a standalone .py file if graph_dump.name.starts_with("vllm_patterns.") { let filename = format!("{}.py", graph_dump.name); let f = build_file_path(&filename, lineno, compile_id); @@ -514,6 +627,7 @@ impl StructuredLogParser for VllmPostGradPassDiffParser { pub fn vllm_parsers_with_state(state: Rc) -> Vec> { vec![ + Box::new(VllmCompileEventParser::new(state.clone())), Box::new(VllmCompilationConfigParser::new(state.clone())), Box::new(VllmPiecewiseSplitGraphParser::new(state.clone())), Box::new(VllmPiecewiseCompileParser::new(state.clone())), @@ -536,28 +650,90 @@ pub fn generate_vllm_summary( tt: &TinyTemplate, custom_header_html: &str, ) -> anyhow::Result { - let config = state.config.borrow().as_ref().map(|c| normalize_config(c)).unwrap_or_default(); - let dynamo_artifacts = state.build_dynamo_artifacts(); - let has_dynamo_artifacts = !dynamo_artifacts.is_empty(); - let pattern_artifacts = state.build_pattern_artifacts(); - let has_pattern_artifacts = !pattern_artifacts.is_empty(); - let piecewise_graph_file = state.piecewise_graph_file.borrow().clone(); - let has_piecewise = piecewise_graph_file.is_some(); - let compile_range_groups = state.build_compile_range_groups(); + let calls = state.compile_calls.borrow(); + + // Build per-call contexts + let mut compile_call_contexts: Vec = Vec::new(); + for (i, call) in calls.iter().enumerate() { + let config = call + .config + .as_ref() + .map(|c| normalize_config(c)) + .unwrap_or_default(); + let has_config = call.config.is_some(); + let dynamo_artifacts = build_dynamo_artifacts(call); + let has_dynamo_artifacts = !dynamo_artifacts.is_empty(); + let pattern_artifacts = build_pattern_artifacts(call); + let has_pattern_artifacts = !pattern_artifacts.is_empty(); + let has_piecewise = call.piecewise_graph_file.is_some(); + let compile_range_groups = build_compile_range_groups(call); + + // Label: use config prefix if available + let label = call + .config + .as_ref() + .and_then(|c| c.prefix.clone()) + .unwrap_or_default(); + + let is_cache_hit = call + .compile_event_type + .as_deref() + == Some("aot_cache_hit"); + + compile_call_contexts.push(VllmCompileCallContext { + display_index: i + 1, + label, + is_first: i == 0, + is_cache_hit, + has_config, + config, + dynamo_artifacts, + has_dynamo_artifacts, + pattern_artifacts, + has_pattern_artifacts, + piecewise_graph_file: call.piecewise_graph_file.clone(), + has_piecewise, + compile_range_groups, + }); + } + + let has_multiple_calls = compile_call_contexts.len() > 1; + + // Detect shared config: if all calls have the same config (by JSON equality) + let (has_shared_config, shared_config) = if has_multiple_calls { + let configs: Vec<_> = calls + .iter() + .filter_map(|c| c.config.as_ref()) + .collect(); + if configs.len() >= 2 { + let first_json = serde_json::to_string(&normalize_config(configs[0])).unwrap_or_default(); + let all_same = configs[1..] + .iter() + .all(|c| serde_json::to_string(&normalize_config(c)).unwrap_or_default() == first_json); + if all_same { + // Hide per-call config since they're all the same + for ctx in &mut compile_call_contexts { + ctx.has_config = false; + } + (true, normalize_config(configs[0])) + } else { + (false, VllmCompilationConfig::default()) + } + } else { + (false, VllmCompilationConfig::default()) + } + } else { + (false, VllmCompilationConfig::default()) + }; let context = VllmSummaryContext { css: super::templates::VLLM_CSS.to_string(), qps: TEMPLATE_QUERY_PARAM_SCRIPT.to_string(), custom_header_html: custom_header_html.to_string(), - has_config: state.config.borrow().is_some(), - config, - dynamo_artifacts, - has_dynamo_artifacts, - pattern_artifacts, - has_pattern_artifacts, - piecewise_graph_file, - has_piecewise, - compile_range_groups, + compile_calls: compile_call_contexts, + has_multiple_calls, + has_shared_config, + shared_config, }; Ok(tt.render("vllm_summary.html", &context)?) diff --git a/src/vllm/templates.rs b/src/vllm/templates.rs index 95bd720..9f7cd8c 100644 --- a/src/vllm/templates.rs +++ b/src/vllm/templates.rs @@ -214,6 +214,42 @@ pub const VLLM_SUMMARY_TEMPLATE: &str = r#" vLLM Compilation Summary @@ -223,30 +259,30 @@ pub const VLLM_SUMMARY_TEMPLATE: &str = r#"

vLLM Compilation Summary

- {{ if has_config }} -

Compilation Configuration

+ {{ if has_shared_config }} +

Compilation Configuration (shared)

Core Settings - - - - - - + + + + + +
Model{config.model}
Mode{config.mode}
Backend{config.backend}
Prefix{config.prefix}
Custom Ops{config.custom_ops}
Splitting Ops{config.splitting_ops}
Model{shared_config.model}
Mode{shared_config.mode}
Backend{shared_config.backend}
Prefix{shared_config.prefix}
Custom Ops{shared_config.custom_ops}
Splitting Ops{shared_config.splitting_ops}
Compile Settings - - - - - - - - + + + + + + + +
CUDAGraph Mode{config.cudagraph_mode}
Use Inductor Graph Partition{config.use_inductor_graph_partition}
Compile Sizes{config.compile_sizes}
Compile Ranges Endpoints{config.compile_ranges_split_points}
Inductor Passes{config.inductor_passes}
Enabled Passes{config.enabled_passes}
Dynamic Shapes Type{config.dynamic_shapes_type}
Dynamic Shapes Evaluate Guards{config.dynamic_shapes_evaluate_guards}
CUDAGraph Mode{shared_config.cudagraph_mode}
Use Inductor Graph Partition{shared_config.use_inductor_graph_partition}
Compile Sizes{shared_config.compile_sizes}
Compile Ranges Endpoints{shared_config.compile_ranges_split_points}
Inductor Passes{shared_config.inductor_passes}
Enabled Passes{shared_config.enabled_passes}
Dynamic Shapes Type{shared_config.dynamic_shapes_type}
Dynamic Shapes Evaluate Guards{shared_config.dynamic_shapes_evaluate_guards}
{{ endif }} @@ -256,40 +292,76 @@ pub const VLLM_SUMMARY_TEMPLATE: &str = r#" You can download and view them in a tool like Perfetto.

- {{ if has_dynamo_artifacts }} + {{ for call in compile_calls }} + {{ if has_multiple_calls }} +
+

Compile Call {call.display_index}{{ if call.label }}: {call.label}{{ endif }}{{ if call.is_cache_hit }} ✅{{ endif }}

+
+ {{ endif }} + + {{ if call.has_config }} +

Compilation Configuration

+
+ Core Settings + + + + + + + +
Model{call.config.model}
Mode{call.config.mode}
Backend{call.config.backend}
Prefix{call.config.prefix}
Custom Ops{call.config.custom_ops}
Splitting Ops{call.config.splitting_ops}
+
+
+ Compile Settings + + + + + + + + + +
CUDAGraph Mode{call.config.cudagraph_mode}
Use Inductor Graph Partition{call.config.use_inductor_graph_partition}
Compile Sizes{call.config.compile_sizes}
Compile Ranges Endpoints{call.config.compile_ranges_split_points}
Inductor Passes{call.config.inductor_passes}
Enabled Passes{call.config.enabled_passes}
Dynamic Shapes Type{call.config.dynamic_shapes_type}
Dynamic Shapes Evaluate Guards{call.config.dynamic_shapes_evaluate_guards}
+
+ {{ endif }} + + {{ if call.has_dynamo_artifacts }}

Dynamo Compilation

    - {{ for artifact in dynamo_artifacts }} + {{ for artifact in call.dynamo_artifacts }}
  • {artifact.name} {artifact.suffix}
  • {{ endfor }}
{{ endif }} - {{ if has_piecewise }} + {{ if call.has_piecewise }}

Piecewise Split Graph

{{ endif }} - {{ if has_pattern_artifacts }} + {{ if call.has_pattern_artifacts }}

Inductor Pass Patterns

    - {{ for artifact in pattern_artifacts }} + {{ for artifact in call.pattern_artifacts }}
  • {artifact.name} {artifact.suffix}
  • {{ endfor }}
{{ endif }} + {{ if call.compile_range_groups }}

Inductor Compilation

- {{ for group in compile_range_groups }} + {{ for group in call.compile_range_groups }}

{group.size_or_range}

@@ -327,6 +399,13 @@ pub const VLLM_SUMMARY_TEMPLATE: &str = r#"
{{ endfor }} + {{ endif }} + + {{ if has_multiple_calls }} + + + {{ endif }} + {{ endfor }} {qps | format_unescaped} diff --git a/src/vllm/types.rs b/src/vllm/types.rs index 30c9872..3e38935 100644 --- a/src/vllm/types.rs +++ b/src/vllm/types.rs @@ -54,13 +54,28 @@ impl VllmSubgraphInfo { } } +/// Represents a single torch.compile call within a vLLM run. +/// Each compile call starts with a `vllm_compilation_config` artifact. +#[derive(Debug, Default)] +pub struct VllmCompileCall { + pub index: usize, + pub config: Option, + pub piecewise_graph_file: Option, + pub subgraphs: Vec, + pub pre_subgraph_artifacts: Vec, + /// "aot_cache_hit" or "fresh_compile", from vllm_compile_event artifact + pub compile_event_type: Option, +} + +/// Template context for a single compile call. #[derive(Debug, Serialize)] -pub struct VllmSummaryContext { - pub css: String, - pub qps: String, - pub custom_header_html: String, - pub config: VllmCompilationConfig, +pub struct VllmCompileCallContext { + pub display_index: usize, + pub label: String, + pub is_first: bool, + pub is_cache_hit: bool, pub has_config: bool, + pub config: VllmCompilationConfig, pub dynamo_artifacts: Vec, pub has_dynamo_artifacts: bool, pub pattern_artifacts: Vec, @@ -70,6 +85,17 @@ pub struct VllmSummaryContext { pub compile_range_groups: Vec, } +#[derive(Debug, Serialize)] +pub struct VllmSummaryContext { + pub css: String, + pub qps: String, + pub custom_header_html: String, + pub compile_calls: Vec, + pub has_multiple_calls: bool, + pub has_shared_config: bool, + pub shared_config: VllmCompilationConfig, +} + #[derive(Debug, Serialize)] pub struct VllmDiffContext { pub css: String,