Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions src/eva/assistant/agentic/audit_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def append_assistant_output(
wall-clock captured at the first ``audio_delta`` so the audit
log reflects when the assistant actually started responding.
"""
if content and not tool_calls:
if content:
entry = {
"value": content,
"displayName": "Bot",
Expand All @@ -213,12 +213,10 @@ def append_assistant_output(
}
self.transcript.append(entry)

# With tool calls, we save an empty content regardless because it is never returned to the client.
# TODO Implement returning the content to the client while tool calls are in progress
self.conversation_messages.append(
ConversationMessage(
role=MessageRole.ASSISTANT,
content="" if tool_calls else content,
content=content,
tool_calls=tool_calls,
)
)
Expand Down
128 changes: 65 additions & 63 deletions src/eva/assistant/agentic/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,9 @@ async def _run_tool_loop(
]

response_content = getattr(response, "content", "") or (response if isinstance(response, str) else "")
if response_content:
response_content = response_content.strip()

response_tool_calls_for_stats = (
[
{"name": tool["function"]["name"], "arguments": tool["function"]["arguments"]}
Expand Down Expand Up @@ -275,77 +278,76 @@ async def _run_tool_loop(
yield GENERIC_ERROR
return

if tool_calls_dicts:
messages.append(
{
"role": "assistant",
"content": response_content,
"tool_calls": tool_calls_dicts,
}
)
if response_content:
logger.info(f"💬 Assistant LLM response: {response_content}")
yield response_content

self.audit_log.append_assistant_output(content=response_content, tool_calls=tool_calls_dicts)

# Execute each tool call
for tool_call in response_tool_calls:
tool_name = _clean_tool_name(tool_call.function.name)
try:
# TODO Consider this a model error instead of handling this gracefully
params = json.loads(tool_call.function.arguments)
except json.JSONDecodeError:
params = {}

# Log tool call
logger.info(f"🔧 Tool call: {tool_name}")
logger.info(f" Parameters: {json.dumps(params, indent=2)}")

# Special handling for transfer to live agent
if tool_name == "transfer_to_agent":
transfer_message = "Transferring you to a live agent. Please wait."
self.audit_log.append_tool_call(
tool_name=tool_name,
parameters=params,
response={"status": "transfer_initiated"},
)

logger.info(f"🔀 Transfer initiated: {transfer_message}")
yield transfer_message
self.audit_log.append_assistant_output(transfer_message)
return

result = await self.tool_handler.execute(tool_name, params)

if result.get("status") == "error":
logger.warning(f"❌ Tool error: {tool_name} - {result.get('message', 'Unknown error')}")
else:
logger.info(f"✅ Tool response: {tool_name}")
logger.info(f" Result: {json.dumps(result, indent=2)}")
self.audit_log.append_assistant_output(content=response_content, tool_calls=tool_calls_dicts or None)

if not tool_calls_dicts:
# No tool calls, this is the final response
return

messages.append(
{
"role": "assistant",
"content": response_content,
"tool_calls": tool_calls_dicts,
}
)

# Execute each tool call
for tool_call in response_tool_calls:
tool_name = _clean_tool_name(tool_call.function.name)
try:
# TODO Consider this a model error instead of handling this gracefully
params = json.loads(tool_call.function.arguments)
except json.JSONDecodeError:
params = {}

# Log tool call
logger.info(f"🔧 Tool call: {tool_name}")
logger.info(f" Parameters: {json.dumps(params, indent=2)}")

# Special handling for transfer to live agent
if tool_name == "transfer_to_agent":
transfer_message = "Transferring you to a live agent. Please wait."
self.audit_log.append_tool_call(
tool_name=tool_name,
parameters=params,
response=result,
response={"status": "transfer_initiated"},
)

# Add tool response to messages
tool_content = json.dumps(result)
messages.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": tool_content,
}
)
logger.info(f"🔀 Transfer initiated: {transfer_message}")
yield transfer_message
self.audit_log.append_assistant_output(transfer_message)
return

self.audit_log.append_tool_message(tool_call_id=tool_call.id, content=tool_content)
else:
# No tool calls, this is the final response
if response_content:
response_content = response_content.strip()
logger.info(f"💬 Assistant LLM response: {response_content}")
yield response_content
self.audit_log.append_assistant_output(response_content)
return
result = await self.tool_handler.execute(tool_name, params)

if result.get("status") == "error":
logger.warning(f"❌ Tool error: {tool_name} - {result.get('message', 'Unknown error')}")
else:
logger.info(f"✅ Tool response: {tool_name}")
logger.info(f" Result: {json.dumps(result, indent=2)}")

self.audit_log.append_tool_call(
tool_name=tool_name,
parameters=params,
response=result,
)

# Add tool response to messages
tool_content = json.dumps(result)
messages.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": tool_content,
}
)

self.audit_log.append_tool_message(tool_call_id=tool_call.id, content=tool_content)

def get_stats(self) -> dict[str, Any]:
"""Get conversation statistics."""
Expand Down
25 changes: 17 additions & 8 deletions tests/unit/assistant/test_agentic_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,29 +146,38 @@ async def test_single_tool_call_then_response(self):
async for msg in system.process_query("Check reservation ABC123"):
responses.append(msg)

assert responses == ["Your reservation ABC123 is confirmed."]
assert responses == ["What if there is text here", "Your reservation ABC123 is confirmed."]

# Verify tool was executed with correct params
tool_handler.execute.assert_awaited_once_with("get_reservation", {"confirmation_number": "ABC123"})

# Verify LLM was called twice (tool call + final response)
assert llm_client.complete.await_count == 2

# Verify transcript
# Verify transcript — content alongside tool calls now appears as an assistant entry
transcript = audit_log.transcript
message_types = [e["message_type"] for e in transcript]
assert message_types == ["user", "llm_call", "tool_call", "tool_response", "llm_call", "assistant"]
assert message_types == [
"user",
"llm_call",
"assistant",
"tool_call",
"tool_response",
"llm_call",
"assistant",
]
assert transcript[0]["value"] == "Check reservation ABC123"
assert transcript[2]["value"]["tool"] == "get_reservation"
assert transcript[3]["value"]["response"]["status"] == "success"
assert transcript[5]["value"] == "Your reservation ABC123 is confirmed."
assert transcript[2]["value"] == "What if there is text here"
assert transcript[3]["value"]["tool"] == "get_reservation"
assert transcript[4]["value"]["response"]["status"] == "success"
assert transcript[6]["value"] == "Your reservation ABC123 is confirmed."

# Verify conversation messages
# Verify conversation messages — content is preserved even with tool calls
assert _conv_to_dicts(audit_log.get_conversation_messages()) == [
{"role": "user", "content": "Check reservation ABC123"},
{
"role": "assistant",
"content": "",
"content": "What if there is text here",
"tool_calls": [
{
"id": "call_1",
Expand Down
Loading