diff --git a/src/eva/assistant/agentic/audit_log.py b/src/eva/assistant/agentic/audit_log.py index b9536da2..7fc28438 100644 --- a/src/eva/assistant/agentic/audit_log.py +++ b/src/eva/assistant/agentic/audit_log.py @@ -202,7 +202,7 @@ def append_assistant_output( wall-clock captured at the first ``audio_delta`` so the audit log reflects when the assistant actually started responding. """ - if content and not tool_calls: + if content: entry = { "value": content, "displayName": "Bot", @@ -213,12 +213,10 @@ def append_assistant_output( } self.transcript.append(entry) - # With tool calls, we save an empty content regardless because it is never returned to the client. - # TODO Implement returning the content to the client while tool calls are in progress self.conversation_messages.append( ConversationMessage( role=MessageRole.ASSISTANT, - content="" if tool_calls else content, + content=content, tool_calls=tool_calls, ) ) diff --git a/src/eva/assistant/agentic/system.py b/src/eva/assistant/agentic/system.py index 34dbe906..f9df217c 100644 --- a/src/eva/assistant/agentic/system.py +++ b/src/eva/assistant/agentic/system.py @@ -186,6 +186,9 @@ async def _run_tool_loop( ] response_content = getattr(response, "content", "") or (response if isinstance(response, str) else "") + if response_content: + response_content = response_content.strip() + response_tool_calls_for_stats = ( [ {"name": tool["function"]["name"], "arguments": tool["function"]["arguments"]} @@ -275,77 +278,76 @@ async def _run_tool_loop( yield GENERIC_ERROR return - if tool_calls_dicts: - messages.append( - { - "role": "assistant", - "content": response_content, - "tool_calls": tool_calls_dicts, - } - ) + if response_content: + logger.info(f"💬 Assistant LLM response: {response_content}") + yield response_content - self.audit_log.append_assistant_output(content=response_content, tool_calls=tool_calls_dicts) - - # Execute each tool call - for tool_call in response_tool_calls: - tool_name = _clean_tool_name(tool_call.function.name) - try: - # TODO Consider this a model error instead of handling this gracefully - params = json.loads(tool_call.function.arguments) - except json.JSONDecodeError: - params = {} - - # Log tool call - logger.info(f"🔧 Tool call: {tool_name}") - logger.info(f" Parameters: {json.dumps(params, indent=2)}") - - # Special handling for transfer to live agent - if tool_name == "transfer_to_agent": - transfer_message = "Transferring you to a live agent. Please wait." - self.audit_log.append_tool_call( - tool_name=tool_name, - parameters=params, - response={"status": "transfer_initiated"}, - ) - - logger.info(f"🔀 Transfer initiated: {transfer_message}") - yield transfer_message - self.audit_log.append_assistant_output(transfer_message) - return - - result = await self.tool_handler.execute(tool_name, params) - - if result.get("status") == "error": - logger.warning(f"❌ Tool error: {tool_name} - {result.get('message', 'Unknown error')}") - else: - logger.info(f"✅ Tool response: {tool_name}") - logger.info(f" Result: {json.dumps(result, indent=2)}") + self.audit_log.append_assistant_output(content=response_content, tool_calls=tool_calls_dicts or None) + if not tool_calls_dicts: + # No tool calls, this is the final response + return + + messages.append( + { + "role": "assistant", + "content": response_content, + "tool_calls": tool_calls_dicts, + } + ) + + # Execute each tool call + for tool_call in response_tool_calls: + tool_name = _clean_tool_name(tool_call.function.name) + try: + # TODO Consider this a model error instead of handling this gracefully + params = json.loads(tool_call.function.arguments) + except json.JSONDecodeError: + params = {} + + # Log tool call + logger.info(f"🔧 Tool call: {tool_name}") + logger.info(f" Parameters: {json.dumps(params, indent=2)}") + + # Special handling for transfer to live agent + if tool_name == "transfer_to_agent": + transfer_message = "Transferring you to a live agent. Please wait." self.audit_log.append_tool_call( tool_name=tool_name, parameters=params, - response=result, + response={"status": "transfer_initiated"}, ) - # Add tool response to messages - tool_content = json.dumps(result) - messages.append( - { - "role": "tool", - "tool_call_id": tool_call.id, - "content": tool_content, - } - ) + logger.info(f"🔀 Transfer initiated: {transfer_message}") + yield transfer_message + self.audit_log.append_assistant_output(transfer_message) + return - self.audit_log.append_tool_message(tool_call_id=tool_call.id, content=tool_content) - else: - # No tool calls, this is the final response - if response_content: - response_content = response_content.strip() - logger.info(f"💬 Assistant LLM response: {response_content}") - yield response_content - self.audit_log.append_assistant_output(response_content) - return + result = await self.tool_handler.execute(tool_name, params) + + if result.get("status") == "error": + logger.warning(f"❌ Tool error: {tool_name} - {result.get('message', 'Unknown error')}") + else: + logger.info(f"✅ Tool response: {tool_name}") + logger.info(f" Result: {json.dumps(result, indent=2)}") + + self.audit_log.append_tool_call( + tool_name=tool_name, + parameters=params, + response=result, + ) + + # Add tool response to messages + tool_content = json.dumps(result) + messages.append( + { + "role": "tool", + "tool_call_id": tool_call.id, + "content": tool_content, + } + ) + + self.audit_log.append_tool_message(tool_call_id=tool_call.id, content=tool_content) def get_stats(self) -> dict[str, Any]: """Get conversation statistics.""" diff --git a/tests/unit/assistant/test_agentic_system.py b/tests/unit/assistant/test_agentic_system.py index 1004e1d6..5a1a602e 100644 --- a/tests/unit/assistant/test_agentic_system.py +++ b/tests/unit/assistant/test_agentic_system.py @@ -146,7 +146,7 @@ async def test_single_tool_call_then_response(self): async for msg in system.process_query("Check reservation ABC123"): responses.append(msg) - assert responses == ["Your reservation ABC123 is confirmed."] + assert responses == ["What if there is text here", "Your reservation ABC123 is confirmed."] # Verify tool was executed with correct params tool_handler.execute.assert_awaited_once_with("get_reservation", {"confirmation_number": "ABC123"}) @@ -154,21 +154,30 @@ async def test_single_tool_call_then_response(self): # Verify LLM was called twice (tool call + final response) assert llm_client.complete.await_count == 2 - # Verify transcript + # Verify transcript — content alongside tool calls now appears as an assistant entry transcript = audit_log.transcript message_types = [e["message_type"] for e in transcript] - assert message_types == ["user", "llm_call", "tool_call", "tool_response", "llm_call", "assistant"] + assert message_types == [ + "user", + "llm_call", + "assistant", + "tool_call", + "tool_response", + "llm_call", + "assistant", + ] assert transcript[0]["value"] == "Check reservation ABC123" - assert transcript[2]["value"]["tool"] == "get_reservation" - assert transcript[3]["value"]["response"]["status"] == "success" - assert transcript[5]["value"] == "Your reservation ABC123 is confirmed." + assert transcript[2]["value"] == "What if there is text here" + assert transcript[3]["value"]["tool"] == "get_reservation" + assert transcript[4]["value"]["response"]["status"] == "success" + assert transcript[6]["value"] == "Your reservation ABC123 is confirmed." - # Verify conversation messages + # Verify conversation messages — content is preserved even with tool calls assert _conv_to_dicts(audit_log.get_conversation_messages()) == [ {"role": "user", "content": "Check reservation ABC123"}, { "role": "assistant", - "content": "", + "content": "What if there is text here", "tool_calls": [ { "id": "call_1",