From 567097c0360a2851ba043cc59c8424c3c527b312 Mon Sep 17 00:00:00 2001 From: Melancholy <1542983487@qq.com> Date: Wed, 20 May 2026 17:13:42 +0800 Subject: [PATCH] =?UTF-8?q?fix(trace):=20LLM=5FTTFT=20=E7=88=B6=E8=8A=82?= =?UTF-8?q?=E7=82=B9=E9=94=99=E8=AF=AF=20+=20=E5=89=8D=E7=AB=AF=E6=9C=AA?= =?UTF-8?q?=E6=98=A0=E5=B0=84=20ERROR/CANCELLED=20=E7=8A=B6=E6=80=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 后端:span.detach() 延迟到 awaitFirstPacket 之后执行,使 llm-first-packet 节点的 parentNodeId 正确指向具体的 provider 流式节点,而非 llm-stream-routing。 前端:STATUS_COLORS 补充 error 和 cancelled 状态映射,防止 故障转移场景下节点渲染崩溃。 Co-Authored-By: Claude Opus 4.7 --- .../pages/admin/traces/RagTraceDetailPage.tsx | 8 +-- .../chat/AbstractOpenAIStyleChatClient.java | 54 ++++++++++++------- .../ragent/infra/chat/RoutingLLMService.java | 9 +++- .../infra/chat/StreamCancellationHandle.java | 8 +++ 4 files changed, 57 insertions(+), 22 deletions(-) diff --git a/frontend/src/pages/admin/traces/RagTraceDetailPage.tsx b/frontend/src/pages/admin/traces/RagTraceDetailPage.tsx index de60a5379..6906fcaa5 100644 --- a/frontend/src/pages/admin/traces/RagTraceDetailPage.tsx +++ b/frontend/src/pages/admin/traces/RagTraceDetailPage.tsx @@ -59,18 +59,20 @@ const copyToClipboard = (text: string, label: string) => { // ============ 状态颜色 ============ -type StatusType = "success" | "failed" | "running" | "default"; +type StatusType = "success" | "failed" | "error" | "running" | "cancelled" | "default"; const STATUS_COLORS: Record = { success: { dot: "bg-emerald-500", bar: "bg-emerald-400" }, failed: { dot: "bg-red-500", bar: "bg-red-400" }, + error: { dot: "bg-red-500", bar: "bg-red-400" }, running: { dot: "bg-amber-500", bar: "bg-amber-400" }, + cancelled: { dot: "bg-slate-400", bar: "bg-slate-300" }, default: { dot: "bg-slate-300", bar: "bg-slate-300" } }; const getStatusColors = (status?: string | null) => { - const normalized = normalizeStatus(status) as StatusType | null; - return STATUS_COLORS[normalized || "default"]; + const normalized = normalizeStatus(status) as StatusType; + return STATUS_COLORS[normalized] ?? STATUS_COLORS.default; }; // ============ 子组件 ============ diff --git a/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/AbstractOpenAIStyleChatClient.java b/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/AbstractOpenAIStyleChatClient.java index 22945a868..211628e36 100644 --- a/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/AbstractOpenAIStyleChatClient.java +++ b/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/AbstractOpenAIStyleChatClient.java @@ -144,25 +144,43 @@ protected StreamCancellationHandle doStreamChat(ChatRequest request, StreamCallb // 在调用线程开 stream span,使后续 first-packet 子节点能正确归属父节点; // 该 span 由 SSE 终态(onComplete / onError)或 cancel 时收尾,记录真实端到端耗时 + // 注意:此处不 detach,由调用方(RoutingLLMService)在 awaitFirstPacket 之后调 handle.detach() StreamSpan span = streamTraceSupport.beginStreamNode(provider() + "-stream-chat", "LLM_PROVIDER"); - StreamSpanCallback wrappedCallback; - try { - wrappedCallback = new StreamSpanCallback(callback, span); - StreamCancellationHandle inner = StreamAsyncExecutor.submit( - modelStreamExecutor, - call, - wrappedCallback, - cancelled -> doStream(call, wrappedCallback, cancelled, reasoningEnabled) - ); - return () -> { - try { - inner.cancel(); - } finally { - wrappedCallback.onCancel(); - } - }; - } finally { - // 同步部分结束:把节点从当前线程的 NODE_STACK 弹出,避免污染兄弟节点的父节点链 + StreamSpanCallback wrappedCallback = new StreamSpanCallback(callback, span); + StreamCancellationHandle inner = StreamAsyncExecutor.submit( + modelStreamExecutor, + call, + wrappedCallback, + cancelled -> doStream(call, wrappedCallback, cancelled, reasoningEnabled) + ); + return new StreamChatHandle(() -> { + try { + inner.cancel(); + } finally { + wrappedCallback.onCancel(); + } + }, span); + } + + /** + * 携带 StreamSpan 的取消句柄,供调用方在首包探测完成后调 detach() 弹出 NODE_STACK + */ + public static final class StreamChatHandle implements StreamCancellationHandle { + private final StreamCancellationHandle delegate; + private final StreamSpan span; + + public StreamChatHandle(StreamCancellationHandle delegate, StreamSpan span) { + this.delegate = delegate; + this.span = span; + } + + @Override + public void cancel() { + delegate.cancel(); + } + + @Override + public void detach() { span.detach(); } } diff --git a/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/RoutingLLMService.java b/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/RoutingLLMService.java index aa1ae2299..3b6b6e470 100644 --- a/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/RoutingLLMService.java +++ b/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/RoutingLLMService.java @@ -138,7 +138,14 @@ public StreamCancellationHandle streamChat(ChatRequest request, StreamCallback c continue; } - ProbeStreamBridge.ProbeResult result = awaitFirstPacket(bridge, handle, callback); + ProbeStreamBridge.ProbeResult result; + try { + result = awaitFirstPacket(bridge, handle, callback); + } finally { + // 首包探测完成后(无论成功失败)弹出 LLM_PROVIDER 节点, + // 确保 TTFT 节点已正确归属到 provider 下 + handle.detach(); + } if (result.isSuccess()) { healthStore.markSuccess(target.id()); diff --git a/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/StreamCancellationHandle.java b/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/StreamCancellationHandle.java index 809830775..3ba15ba42 100644 --- a/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/StreamCancellationHandle.java +++ b/infra-ai/src/main/java/com/nageoffer/ai/ragent/infra/chat/StreamCancellationHandle.java @@ -45,4 +45,12 @@ public interface StreamCancellationHandle { * - 调用后应该不会再继续产生 onContent() 回调 */ void cancel(); + + /** + * 将该 handle 关联的 trace 节点从当前线程的 NODE_STACK 弹出 + *

+ * 仅由内部实现覆写(如 StreamChatHandle),用于延迟 detach 跨线程 span, + * 使首包探测等同步子节点能正确归属到该 provider 节点下 + */ + default void detach() {} }