diff --git a/openhands-agent-server/openhands/agent_server/openai_service.py b/openhands-agent-server/openhands/agent_server/openai_service.py index 1e669287d3..20b231f520 100644 --- a/openhands-agent-server/openhands/agent_server/openai_service.py +++ b/openhands-agent-server/openhands/agent_server/openai_service.py @@ -18,6 +18,7 @@ OpenAIModel, OpenAIModelListResponse, OpenAIResponseMessage, + OpenAIUsage, ) from openhands.agent_server.persistence import PersistedSettings, get_settings_store from openhands.sdk import LLM, Message @@ -26,7 +27,10 @@ SendMessageRequest, StartConversationRequest, ) -from openhands.sdk.conversation.state import ConversationExecutionStatus +from openhands.sdk.conversation.state import ( + ConversationExecutionStatus, + ConversationState, +) from openhands.sdk.llm.llm_profile_store import LLMProfileStore from openhands.sdk.llm.message import ImageContent, TextContent from openhands.sdk.logger import get_logger @@ -271,6 +275,20 @@ async def _delete_conversation_safely( ) +def _openai_usage_from_state(state: ConversationState) -> OpenAIUsage: + token_usage = state.stats.get_combined_metrics().accumulated_token_usage + if token_usage is None: + return OpenAIUsage() + + prompt_tokens = token_usage.prompt_tokens + completion_tokens = token_usage.completion_tokens + return OpenAIUsage( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ) + + async def list_openai_models() -> OpenAIModelListResponse: try: profiles = LLMProfileStore().list_summaries() @@ -341,6 +359,7 @@ async def run_chat_completion( event_service, allow_existing_response=allow_existing_response ) _raise_for_terminal_error(status_value) + state = await event_service.get_state() final_response = await event_service.get_agent_final_response() response = OpenAIChatCompletionResponse( id=f"chatcmpl-{uuid4().hex}", @@ -352,6 +371,7 @@ async def run_chat_completion( message=OpenAIResponseMessage(content=final_response), ) ], + usage=_openai_usage_from_state(state), ) assert conversation_id is not None return OpenAIChatCompletionResult( diff --git a/tests/cross/test_remote_conversation_live_server.py b/tests/cross/test_remote_conversation_live_server.py index 94852f7a1c..565182da37 100644 --- a/tests/cross/test_remote_conversation_live_server.py +++ b/tests/cross/test_remote_conversation_live_server.py @@ -191,7 +191,6 @@ def fake_completion( ): # type: ignore[no-untyped-def] from openhands.sdk.llm.llm_response import LLMResponse from openhands.sdk.llm.message import Message - from openhands.sdk.llm.utils.metrics import MetricsSnapshot # Create a minimal ModelResponse with a single assistant message litellm_msg = LiteLLMMessage.model_validate( @@ -210,17 +209,21 @@ def fake_completion( # Convert to OpenHands Message message = Message.from_llm_chat_message(litellm_msg) - # Create metrics snapshot - metrics_snapshot = MetricsSnapshot( - model_name="test-model", - accumulated_cost=0.0, - max_budget_per_task=None, - accumulated_token_usage=None, + self.metrics.add_token_usage( + prompt_tokens=7, + completion_tokens=5, + cache_read_tokens=0, + cache_write_tokens=0, + context_window=8192, + response_id="test-resp", + reasoning_tokens=0, ) # Return LLMResponse as expected by the agent return LLMResponse( - message=message, metrics=metrics_snapshot, raw_response=raw_response + message=message, + metrics=self.metrics.get_snapshot(), + raw_response=raw_response, ) monkeypatch.setattr(LLM, "completion", fake_completion, raising=True) @@ -639,6 +642,11 @@ def test_openai_chat_completions_gateway_over_real_server( "role": "assistant", "content": "Hello from patched LLM", } + assert body["usage"] == { + "prompt_tokens": 7, + "completion_tokens": 5, + "total_tokens": 12, + } conversation_id = response.headers["X-OpenHands-ServerConversation-ID"] UUID(conversation_id)