Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 58 additions & 13 deletions server/internal/integrations/octo/outbound.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,7 @@ func (p *Patcher) processEvent(ctx context.Context, e events.Event) error {
case protocol.EventChatDone:
return p.sendReply(ctx, inst, binding, taskID, chatDoneContent(e.Payload), token)
case protocol.EventTaskFailed:
msg := errorMessageFromPayload(e.Payload)
if msg == "" {
msg = "The agent run failed."
}
return p.sendReply(ctx, inst, binding, taskID, "⚠️ "+msg, token)
return p.sendReply(ctx, inst, binding, taskID, "⚠️ "+failureMessageFromPayload(e.Payload), token)
}
return nil
}
Expand Down Expand Up @@ -213,14 +209,63 @@ func chatDoneContent(payload any) string {
return ""
}

func errorMessageFromPayload(payload any) string {
if m, ok := payload.(map[string]any); ok {
if s, ok := m["error"].(string); ok && s != "" {
return s
}
if s, ok := m["error_message"].(string); ok && s != "" {
return s
// failureMessageFromPayload builds the user-facing text for a task:failed
// event. Precedence:
// 1. The explicit error / error_message string (the redacted detail the
// daemon reported) — most actionable.
// 2. A friendly Chinese description of the coarse failure_reason classifier.
// 3. A generic fallback when neither is present.
//
// The IM user should never be left with a bare "运行失败" when the backend
// actually knows what went wrong.
func failureMessageFromPayload(payload any) string {
m, ok := payload.(map[string]any)
if !ok {
return defaultFailureMessage
}
if s, ok := m["error"].(string); ok && s != "" {
return s
}
if s, ok := m["error_message"].(string); ok && s != "" {
return s
}
if reason, ok := m["failure_reason"].(string); ok && reason != "" {
if desc, ok := failureReasonText[reason]; ok {
return desc
}
// Unknown reason (a classifier value added server-side later):
// downgrade to the generic message rather than leaking a raw enum.
return defaultFailureMessage
}
return ""
return defaultFailureMessage
}

const defaultFailureMessage = "Agent 运行失败,请稍后重试或联系工作区管理员。"

// failureReasonText maps the taskfailure.Reason string values to friendly
// Chinese copy. Keep the keys in sync with server/pkg/taskfailure/failure.go;
// a missing key falls back to defaultFailureMessage, so drift downgrades
// gracefully rather than crashing.
var failureReasonText = map[string]string{
"queued_expired": "任务排队超时,未被任何 runtime 领取。请确认 Agent 的 daemon 在线。",
"runtime_offline": "Agent 的 runtime 当前离线,消息已记录。请确认 daemon 在线后重试。",
"runtime_recovery": "Agent 的 runtime 正在恢复中,请稍后重试。",
"timeout": "Agent 运行超时,请稍后重试。",
"iteration_limit": "Agent 达到迭代上限,未能完成。请简化请求或重试。",
"agent_blocked": "Agent 被阻塞,无法继续。请联系工作区管理员。",
"api_invalid_request": "请求无效,Agent 无法处理。请调整后重试。",
"agent_error.provider_auth_or_access": "模型服务认证失败,请检查 Agent runtime 的 API Key 配置。",
"agent_error.provider_quota_limit": "模型服务额度已用尽,请检查账户额度。",
"agent_error.provider_capacity_or_rate_limit": "模型服务繁忙或触发限流,请稍后重试。",
"agent_error.provider_server_error": "模型服务返回错误,请稍后重试。",
"agent_error.provider_network": "连接模型服务失败,请检查网络后重试。",
"agent_error.process_failure": "Agent 进程异常退出,请联系工作区管理员。",
"agent_error.empty_or_unparseable_output": "Agent 未返回有效结果,请重试。",
"agent_error.agent_timeout": "Agent 运行超时,请稍后重试。",
"agent_error.context_overflow": "对话上下文过长,Agent 无法处理。请精简内容后重试。",
"agent_error.missing_config": "Agent runtime 缺少必要配置(如环境变量),请联系工作区管理员。",
"agent_error.model_not_found_or_unavailable": "指定的模型不存在或不可用,请检查 Agent 的模型配置。",
"agent_error.runtime_version_unsupported": "Agent runtime 版本不受支持,请升级后重试。",
"agent_error.runtime_missing_executable": "Agent runtime 缺少所需的可执行文件,请检查安装。",
"agent_error.unknown": defaultFailureMessage,
}
67 changes: 67 additions & 0 deletions server/internal/integrations/octo/outbound_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,73 @@ func TestProcessEvent_TaskFailed_SendsError(t *testing.T) {
}
}

func TestProcessEvent_TaskFailed_FallsBackToFailureReason(t *testing.T) {
q := &fakePatcherQueries{binding: octoBinding(), inst: activeInst()}
s := &fakeSender{}
p := newPatcher(q, s)

// No explicit error string — only the coarse classifier. The relay must
// translate it into the friendly Chinese copy, not the generic fallback.
e := events.Event{
Type: protocol.EventTaskFailed,
TaskID: "11111111-1111-1111-1111-111111111111",
ChatSessionID: "22222222-2222-2222-2222-222222222222",
Payload: map[string]any{"failure_reason": "agent_error.provider_auth_or_access"},
}
if err := p.processEvent(context.Background(), e); err != nil {
t.Fatalf("processEvent: %v", err)
}
want := "⚠️ " + failureReasonText["agent_error.provider_auth_or_access"]
if s.sent != 1 || s.lastTxt != want {
t.Errorf("sent=%d lastTxt=%q, want %q", s.sent, s.lastTxt, want)
}
}

func TestProcessEvent_TaskFailed_DefaultWhenNoDetail(t *testing.T) {
q := &fakePatcherQueries{binding: octoBinding(), inst: activeInst()}
s := &fakeSender{}
p := newPatcher(q, s)

// Neither error nor failure_reason — the user still gets an actionable,
// non-empty message rather than a bare or English fallback.
e := events.Event{
Type: protocol.EventTaskFailed,
TaskID: "11111111-1111-1111-1111-111111111111",
ChatSessionID: "22222222-2222-2222-2222-222222222222",
Payload: map[string]any{},
}
if err := p.processEvent(context.Background(), e); err != nil {
t.Fatalf("processEvent: %v", err)
}
if s.sent != 1 || s.lastTxt != "⚠️ "+defaultFailureMessage {
t.Errorf("sent=%d lastTxt=%q, want default", s.sent, s.lastTxt)
}
}

func TestFailureMessageFromPayload(t *testing.T) {
cases := []struct {
name string
payload any
want string
}{
{"explicit error wins", map[string]any{"error": "boom", "failure_reason": "timeout"}, "boom"},
{"error_message alias", map[string]any{"error_message": "kaboom"}, "kaboom"},
{"known reason", map[string]any{"failure_reason": "runtime_offline"}, failureReasonText["runtime_offline"]},
{"unknown reason downgrades", map[string]any{"failure_reason": "some_future_reason"}, defaultFailureMessage},
{"empty map", map[string]any{}, defaultFailureMessage},
{"non-map payload", "not a map", defaultFailureMessage},
{"nil payload", nil, defaultFailureMessage},
{"empty error falls through to reason", map[string]any{"error": "", "failure_reason": "timeout"}, failureReasonText["timeout"]},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got := failureMessageFromPayload(tc.payload); got != tc.want {
t.Errorf("failureMessageFromPayload(%v) = %q, want %q", tc.payload, got, tc.want)
}
})
}
}

func TestProcessEvent_WebOnlySession_Skips(t *testing.T) {
q := &fakePatcherQueries{bindingErr: pgx.ErrNoRows}
s := &fakeSender{}
Expand Down
36 changes: 29 additions & 7 deletions server/internal/service/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1766,17 +1766,28 @@ func (s *TaskService) HandleFailedTasks(ctx context.Context, tasks []db.AgentTas
}

if workspaceID != "" {
payload := map[string]any{
"task_id": util.UUIDToString(t.ID),
"agent_id": util.UUIDToString(t.AgentID),
"issue_id": util.UUIDToString(t.IssueID),
"status": "failed",
"failure_reason": failureReason,
}
// Chat tasks carry no issue_id; the outbound relays route on the
// chat session, so it must be present or the IM reply is dropped.
if t.ChatSessionID.Valid {
payload["chat_session_id"] = util.UUIDToString(t.ChatSessionID)
}
// Redacted human-readable detail so the IM user sees the actual
// cause, not just "agent run failed".
if t.Error.Valid && t.Error.String != "" {
payload["error"] = redact.Text(t.Error.String)
}
s.Bus.Publish(events.Event{
Type: protocol.EventTaskFailed,
WorkspaceID: workspaceID,
ActorType: "system",
Payload: map[string]any{
"task_id": util.UUIDToString(t.ID),
"agent_id": util.UUIDToString(t.AgentID),
"issue_id": util.UUIDToString(t.IssueID),
"status": "failed",
"failure_reason": failureReason,
},
Payload: payload,
})
}

Expand Down Expand Up @@ -2007,6 +2018,17 @@ func (s *TaskService) broadcastTaskEvent(ctx context.Context, eventType string,
if task.ChatSessionID.Valid {
payload["chat_session_id"] = util.UUIDToString(task.ChatSessionID)
}
// Surface the failure detail to event consumers (e.g. the Octo/Lark
// outbound relays) so an IM user sees what went wrong instead of a generic
// "agent run failed". error is the redacted human-readable message; the
// coarse failure_reason is the machine classifier. Both are only set on a
// failed task row, so a completed/cancelled task simply omits them.
if task.FailureReason.Valid && task.FailureReason.String != "" {
payload["failure_reason"] = task.FailureReason.String
}
if task.Error.Valid && task.Error.String != "" {
payload["error"] = redact.Text(task.Error.String)
}
s.Bus.Publish(events.Event{
Type: eventType,
WorkspaceID: workspaceID,
Expand Down
Loading