Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion openhands-agent-server/openhands/agent_server/openai_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
OpenAIModel,
OpenAIModelListResponse,
OpenAIResponseMessage,
OpenAIUsage,
)
from openhands.agent_server.persistence import PersistedSettings, get_settings_store
from openhands.sdk import LLM, Message
Expand All @@ -26,7 +27,10 @@
SendMessageRequest,
StartConversationRequest,
)
from openhands.sdk.conversation.state import ConversationExecutionStatus
from openhands.sdk.conversation.state import (
ConversationExecutionStatus,
ConversationState,
)
from openhands.sdk.llm.llm_profile_store import LLMProfileStore
from openhands.sdk.llm.message import ImageContent, TextContent
from openhands.sdk.logger import get_logger
Expand Down Expand Up @@ -271,6 +275,20 @@ async def _delete_conversation_safely(
)


def _openai_usage_from_state(state: ConversationState) -> OpenAIUsage:
token_usage = state.stats.get_combined_metrics().accumulated_token_usage
if token_usage is None:
return OpenAIUsage()

prompt_tokens = token_usage.prompt_tokens
completion_tokens = token_usage.completion_tokens
return OpenAIUsage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
)


async def list_openai_models() -> OpenAIModelListResponse:
try:
profiles = LLMProfileStore().list_summaries()
Expand Down Expand Up @@ -341,6 +359,7 @@ async def run_chat_completion(
event_service, allow_existing_response=allow_existing_response
)
_raise_for_terminal_error(status_value)
state = await event_service.get_state()
final_response = await event_service.get_agent_final_response()
response = OpenAIChatCompletionResponse(
id=f"chatcmpl-{uuid4().hex}",
Expand All @@ -352,6 +371,7 @@ async def run_chat_completion(
message=OpenAIResponseMessage(content=final_response),
)
],
usage=_openai_usage_from_state(state),
)
assert conversation_id is not None
return OpenAIChatCompletionResult(
Expand Down
24 changes: 16 additions & 8 deletions tests/cross/test_remote_conversation_live_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,6 @@ def fake_completion(
): # type: ignore[no-untyped-def]
from openhands.sdk.llm.llm_response import LLMResponse
from openhands.sdk.llm.message import Message
from openhands.sdk.llm.utils.metrics import MetricsSnapshot

# Create a minimal ModelResponse with a single assistant message
litellm_msg = LiteLLMMessage.model_validate(
Expand All @@ -210,17 +209,21 @@ def fake_completion(
# Convert to OpenHands Message
message = Message.from_llm_chat_message(litellm_msg)

# Create metrics snapshot
metrics_snapshot = MetricsSnapshot(
model_name="test-model",
accumulated_cost=0.0,
max_budget_per_task=None,
accumulated_token_usage=None,
self.metrics.add_token_usage(
prompt_tokens=7,
completion_tokens=5,
cache_read_tokens=0,
cache_write_tokens=0,
context_window=8192,
response_id="test-resp",
reasoning_tokens=0,
)

# Return LLMResponse as expected by the agent
return LLMResponse(
message=message, metrics=metrics_snapshot, raw_response=raw_response
message=message,
metrics=self.metrics.get_snapshot(),
raw_response=raw_response,
)

monkeypatch.setattr(LLM, "completion", fake_completion, raising=True)
Expand Down Expand Up @@ -639,6 +642,11 @@ def test_openai_chat_completions_gateway_over_real_server(
"role": "assistant",
"content": "Hello from patched LLM",
}
assert body["usage"] == {
"prompt_tokens": 7,
"completion_tokens": 5,
"total_tokens": 12,
}
conversation_id = response.headers["X-OpenHands-ServerConversation-ID"]
UUID(conversation_id)

Expand Down
Loading