Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
bfc2646
feat(agent-server): add OpenAI chat completions gateway
enyst Jun 6, 2026
bc5909d
Apply suggestion from @enyst
enyst Jun 6, 2026
0c5e334
docs(agent-server): explain OpenAI gateway auth headers
enyst Jun 6, 2026
2941fc9
refactor(agent-server): split OpenAI gateway service and models
enyst Jun 6, 2026
f86a15f
test(agent-server): document OpenAI gateway live runner
enyst Jun 6, 2026
b7cb647
test(agent-server): use natural live gateway prompts
enyst Jun 6, 2026
ea64fcf
fix(agent-server): report OpenAI gateway token usage (#3546)
smolpaws Jun 6, 2026
1bbb541
test(agent-server): replay frozen OpenAI gateway completions
openhands-agent Jun 6, 2026
deb783f
fix(agent-server): keep OpenAI gateway conversations
openhands-agent Jun 6, 2026
3b2a883
docs(examples): add OpenAI gateway client demo
openhands-agent Jun 6, 2026
24b4ea4
docs(examples): clarify OpenAI gateway demo
enyst Jun 6, 2026
a83d79f
Merge branch 'main' into openhands/openai-chat-completions-gateway
enyst Jun 9, 2026
0bfad38
Merge branch 'main' into openhands/openai-chat-completions-gateway
enyst Jun 9, 2026
321b418
Reuse OpenAI gateway response models
enyst Jun 9, 2026
facefd9
Merge branch 'main' into openhands/openai-chat-completions-gateway
enyst Jun 9, 2026
2269f71
Fix OpenAI gateway workspace configuration
enyst Jun 9, 2026
2e7ecca
Clarify OpenAI gateway follow-up choices
enyst Jun 9, 2026
f60e15d
chore: Remove PR-only artifacts [automated]
Jun 9, 2026
88a7d45
Merge branch 'main' into openhands/openai-chat-completions-gateway
enyst Jun 10, 2026
e54ed38
Address OpenAI gateway review nits
enyst Jun 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions examples/02_remote_agent_server/15_openai_compatible_gateway.py
Comment thread
enyst marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
"""Use the agent-server through an OpenAI-compatible Chat Completions client.

This example starts a local agent-server, stores an LLM profile, lists it through
``GET /v1/models``, then calls ``POST /v1/chat/completions`` with the OpenAI
Python SDK. The returned ``X-OpenHands-ServerConversation-ID`` header is passed
back on a second call to continue the same OpenHands conversation.
"""

import os
from uuid import UUID

import httpx
from openai import OpenAI
from scripts.utils import ManagedAPIServer


# The gateway runs a full OpenHands agent, but OpenAI clients still need a
# normal model-like name. We create an LLM profile below and expose it as
# `openhands_<profile_name>` through `/v1/models`.

api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY")
assert api_key is not None, "Set LLM_API_KEY or OPENAI_API_KEY."

llm_model = os.getenv("LLM_MODEL", "gpt-5-nano")
llm_base_url = os.getenv("LLM_BASE_URL")
profile_name = "gateway_demo"
gateway_model = f"openhands_{profile_name}"

# Start a local agent-server for the demo. `use_session_api_key=True` turns on
# authentication; the same key works as both `X-Session-API-Key` for native
# agent-server routes and `Authorization: Bearer ...` for OpenAI SDK calls.

with ManagedAPIServer(
port=8770,
use_session_api_key=True,
extra_env={
"OH_ENABLE_VNC": "0",
"OH_ENABLE_VSCODE": "0",
"OH_PRELOAD_TOOLS": "0",
"OH_SECRET_KEY": "example-secret-key-for-demo-only-32b",
"OH_WEBHOOKS": "[]",
},
health_request_timeout=2.0,
) as server:
session_api_key = (
os.getenv("SESSION_API_KEY")
or os.getenv("OH_SESSION_API_KEYS_0")
or server.session_api_key
)
assert session_api_key is not None

# Use the native REST API once to create the profile that backs the gateway
# model. After that, normal OpenAI SDK calls are enough for chat traffic.
api_client = httpx.Client(
base_url=server.base_url,
headers={"X-Session-API-Key": session_api_key},
timeout=120.0,
)
openai_client = OpenAI(
api_key=session_api_key,
base_url=f"{server.base_url}/v1",
timeout=120.0,
)

llm_config = {"model": llm_model, "api_key": api_key}
if llm_base_url:
llm_config["base_url"] = llm_base_url

# `gateway_demo` becomes visible to OpenAI clients as `openhands_gateway_demo`.
profile_response = api_client.post(
f"/api/profiles/{profile_name}",
json={"llm": llm_config, "include_secrets": True},
)
assert profile_response.status_code == 201, profile_response.text

models = openai_client.models.list()
model_ids = [model.id for model in models.data]
assert gateway_model in model_ids
print(f"Gateway models include: {gateway_model}")

# Ask through the OpenAI SDK. `with_raw_response` lets us read the custom
# response header that identifies the OpenHands conversation created behind
# this otherwise OpenAI-shaped request.

first_response = openai_client.chat.completions.with_raw_response.create(
model=gateway_model,
messages=[
{
"role": "system",
"content": "Answer directly and do not use tools.",
},
{
"role": "user",
"content": (
"In one sentence, explain what an OpenAI-compatible "
"agent-server gateway does."
),
},
],
)
first_completion = first_response.parse()
conversation_id = first_response.headers.get("X-OpenHands-ServerConversation-ID")
assert conversation_id is not None
UUID(conversation_id)

first_answer = first_completion.choices[0].message.content
print(f"First answer: {first_answer}")
print(f"OpenHands conversation ID: {conversation_id}")

persisted_response = api_client.get(f"/api/conversations/{conversation_id}")
assert persisted_response.status_code == 200, persisted_response.text

# The gateway keeps conversations by default. Passing the header back lets
# another OpenAI-compatible request continue the same server-side agent
# conversation instead of starting over.

second_completion = openai_client.chat.completions.create(
model=gateway_model,
messages=[
{
"role": "user",
"content": "Now answer in five words or fewer: what did I ask about?",
}
],
extra_headers={"X-OpenHands-ServerConversation-ID": conversation_id},
)
second_answer = second_completion.choices[0].message.content
print(f"Second answer using same conversation: {second_answer}")

conversation_response = api_client.get(f"/api/conversations/{conversation_id}")
assert conversation_response.status_code == 200, conversation_response.text
stats = conversation_response.json().get("stats") or {}
usage_to_metrics = stats.get("usage_to_metrics") or {}
accumulated_cost = sum(
metrics.get("accumulated_cost", 0.0) for metrics in usage_to_metrics.values()
)

# Clean up the demo resources. Real applications can keep the conversation
# ID and inspect it later through the native agent-server API.
api_client.delete(f"/api/conversations/{conversation_id}")
api_client.delete(f"/api/profiles/{profile_name}")
api_client.close()

print(f"EXAMPLE_COST: {accumulated_cost}")
9 changes: 9 additions & 0 deletions openhands-agent-server/openhands/agent_server/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@
from openhands.agent_server.llm_router import llm_router
from openhands.agent_server.mcp_router import mcp_router
from openhands.agent_server.middleware import CORSDispatcher
from openhands.agent_server.openai.router import (
create_openai_api_key_dependency,
openai_router,
)
from openhands.agent_server.profiles_router import profiles_router
from openhands.agent_server.server_details_router import (
get_server_info,
Expand Down Expand Up @@ -319,6 +323,11 @@ def _add_api_routes(app: FastAPI, config: Config) -> None:
api_router.include_router(auth_router)
app.include_router(api_router)

openai_dependencies = []
if config.session_api_keys:
openai_dependencies.append(Depends(create_openai_api_key_dependency(config)))
app.include_router(openai_router, dependencies=openai_dependencies)

# Workspace static-file routes get their own auth group that accepts
# EITHER the X-Session-API-Key header OR the workspace session cookie.
# The cookie is required so that <iframe src> / <img src> embeds of
Expand Down
6 changes: 6 additions & 0 deletions openhands-agent-server/openhands/agent_server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,12 @@ class Config(BaseModel):
"The location of the directory where conversations and events are stored."
),
)
workspace_path: Path = Field(
default=Path("workspace/project"),
description=(
"Default workspace directory for conversations created by the server."
),
)
bash_events_dir: Path = Field(
default=Path("workspace/bash_events"),
description=(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# OpenAI-compatible gateway

This package contains the agent-server implementation for the OpenAI-compatible API surface under `/v1`.

- `router.py` defines the FastAPI routes and maps OpenAI-style bearer authentication to the existing session key mechanism.
- `models.py` contains the small server-side request models and aliases the reusable OpenAI response models.
- `service.py` translates OpenAI chat completion requests into OpenHands conversations, waits for completion, and returns OpenAI-shaped responses.

The gateway intentionally stays separate from the native agent-server routers so the OpenAI compatibility layer can evolve without mixing protocol translation code into the core REST API modules.
Empty file.
48 changes: 48 additions & 0 deletions openhands-agent-server/openhands/agent_server/openai/models.py
Comment thread
enyst marked this conversation as resolved.
Comment thread
enyst marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Models for the OpenAI-compatible agent-server gateway."""

from typing import Literal

from openai.types import CompletionUsage, Model
from openai.types.chat import ChatCompletion
from openai.types.chat.chat_completion import Choice
from openai.types.chat.chat_completion_message import ChatCompletionMessage
from pydantic import BaseModel, ConfigDict


OpenAIChatCompletionChoice = Choice
OpenAIChatCompletionResponse = ChatCompletion
OpenAIModel = Model
OpenAIResponseMessage = ChatCompletionMessage
OpenAIUsage = CompletionUsage


class OpenAIImageURL(BaseModel):
url: str


class OpenAIContentPart(BaseModel):
type: str
text: str | None = None
image_url: OpenAIImageURL | str | None = None

model_config = ConfigDict(extra="ignore")


class OpenAIChatMessage(BaseModel):
role: Literal["system", "user", "assistant", "tool"]
content: str | list[OpenAIContentPart] | None = None

model_config = ConfigDict(extra="ignore")


class OpenAIChatCompletionRequest(BaseModel):
model: str
messages: list[OpenAIChatMessage]
stream: bool = False

model_config = ConfigDict(extra="ignore")


class OpenAIModelListResponse(BaseModel):
object: Literal["list"] = "list"
data: list[OpenAIModel]
95 changes: 95 additions & 0 deletions openhands-agent-server/openhands/agent_server/openai/router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""OpenAI-compatible gateway routes for the agent server."""

from typing import Annotated
from uuid import UUID

from fastapi import APIRouter, Depends, Header, HTTPException, Request, Response, status
from fastapi.security import APIKeyHeader, HTTPAuthorizationCredentials, HTTPBearer

from openhands.agent_server.config import Config
from openhands.agent_server.conversation_service import ConversationService
from openhands.agent_server.dependencies import get_conversation_service
from openhands.agent_server.openai.models import (
OpenAIChatCompletionRequest,
OpenAIChatCompletionResponse,
OpenAIModelListResponse,
)
from openhands.agent_server.openai.service import (
list_openai_models,
run_chat_completion,
)


openai_router = APIRouter(tags=["OpenAI Compatibility"])

_SESSION_API_KEY_HEADER = APIKeyHeader(name="X-Session-API-Key", auto_error=False)
_AUTHORIZATION_HEADER = HTTPBearer(auto_error=False)


def create_openai_api_key_dependency(config: Config):
"""Accept the same session key through OpenHands and OpenAI auth shapes.

``X-Session-API-Key`` preserves compatibility with existing agent-server
clients, while ``Authorization: Bearer`` lets OpenAI-compatible clients use
their standard API-key header. Both forms validate against
``config.session_api_keys``; this does not introduce a second credential
system. When no session keys are configured, the local server remains
unauthenticated like the existing agent-server API.
"""

def check_openai_api_key(
session_api_key: str | None = Depends(_SESSION_API_KEY_HEADER),
authorization: HTTPAuthorizationCredentials | None = Depends(
_AUTHORIZATION_HEADER
),
) -> None:
if not config.session_api_keys:
return
bearer_token = authorization.credentials if authorization else None
if session_api_key in config.session_api_keys:
return
if bearer_token in config.session_api_keys:
return
raise HTTPException(status.HTTP_401_UNAUTHORIZED)

return check_openai_api_key


def _get_config(request: Request) -> Config:
config = getattr(request.app.state, "config", None)
if not isinstance(config, Config):
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="Agent server config is not available",
)
return config


@openai_router.get("/v1/models", response_model=OpenAIModelListResponse)
async def get_openai_models(request: Request) -> OpenAIModelListResponse:
_get_config(request)
return await list_openai_models()


@openai_router.post(
"/v1/chat/completions",
response_model=OpenAIChatCompletionResponse,
response_model_exclude_none=True,
)
async def create_chat_completion(
body: OpenAIChatCompletionRequest,
request: Request,
response: Response,
x_openhands_server_conversation_id: Annotated[
UUID | None, Header(alias="X-OpenHands-ServerConversation-ID")
] = None,
conversation_service: ConversationService = Depends(get_conversation_service),
) -> OpenAIChatCompletionResponse:
result = await run_chat_completion(
request=body,
config=_get_config(request),
conversation_service=conversation_service,
reusable_conversation_id=x_openhands_server_conversation_id,
)
response.headers["X-OpenHands-ServerConversation-ID"] = str(result.conversation_id)
return result.response
Loading
Loading