From 880c0feee34861ff330408dd1098c68b91006d28 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Jun 2026 04:35:29 +0000 Subject: [PATCH 1/3] Initial plan From 84ac5eb71d7ab64e5fb851f01bf1b52014399b0e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Jun 2026 04:37:09 +0000 Subject: [PATCH 2/3] Add E2E timing from submit_store_request to get_finished --- .../vllm/vllm_multi_process_adapter.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lmcache/integration/vllm/vllm_multi_process_adapter.py b/lmcache/integration/vllm/vllm_multi_process_adapter.py index 3f855c49e5..c4c2ea818e 100644 --- a/lmcache/integration/vllm/vllm_multi_process_adapter.py +++ b/lmcache/integration/vllm/vllm_multi_process_adapter.py @@ -7,6 +7,7 @@ import enum import os import threading +import time # Third Party import torch @@ -936,6 +937,10 @@ def __init__( # Prevents re-reporting the same ID after drain clears tracking sets. self._returned_finished: set[str] = set() + # Timestamps recorded when submit_store_request is called, used to + # measure E2E wall-clock time until the future is resolved. + self._store_submit_times: dict[str, float] = {} + self.model_name = model_name self.parallel_strategy = parallel_strategy @@ -1185,6 +1190,7 @@ def submit_store_request( ) self.store_futures[request_id] = future self.store_events[request_id] = event + self._store_submit_times[request_id] = time.perf_counter() @_lmcache_nvtx_annotate def submit_retrieve_request( @@ -1345,6 +1351,7 @@ def get_finished( self.retrieve_futures.clear() self.store_events.clear() self.retrieve_events.clear() + self._store_submit_times.clear() ret_stores = self._process_finished_stores( finished_stores, finished_req_ids_from_engine @@ -1363,6 +1370,15 @@ def get_finished( if not s_future.query(): continue + _t_done = time.perf_counter() + _t_submit = self._store_submit_times.pop(request_id, None) + if _t_submit is not None: + logger.info( + "[E2E-STORE] req=%s e2e=%.3f ms", + str(request_id), + (_t_done - _t_submit) * 1000, + ) + s_result = s_future.result() finished_stores.add(request_id) From 0d993fc13079c6fc0d6fd82727ff0ee4a90ec510 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Jun 2026 04:37:41 +0000 Subject: [PATCH 3/3] Remove redundant str() in E2E-STORE log call --- lmcache/integration/vllm/vllm_multi_process_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lmcache/integration/vllm/vllm_multi_process_adapter.py b/lmcache/integration/vllm/vllm_multi_process_adapter.py index c4c2ea818e..ff035dd392 100644 --- a/lmcache/integration/vllm/vllm_multi_process_adapter.py +++ b/lmcache/integration/vllm/vllm_multi_process_adapter.py @@ -1375,7 +1375,7 @@ def get_finished( if _t_submit is not None: logger.info( "[E2E-STORE] req=%s e2e=%.3f ms", - str(request_id), + request_id, (_t_done - _t_submit) * 1000, )