diff --git a/examples/02_remote_agent_server/15_openai_compatible_gateway.py b/examples/02_remote_agent_server/15_openai_compatible_gateway.py
new file mode 100644
index 0000000000..437d9a898b
--- /dev/null
+++ b/examples/02_remote_agent_server/15_openai_compatible_gateway.py
@@ -0,0 +1,144 @@
+"""Use the agent-server through an OpenAI-compatible Chat Completions client.
+
+This example starts a local agent-server, stores an LLM profile, lists it through
+``GET /v1/models``, then calls ``POST /v1/chat/completions`` with the OpenAI
+Python SDK. The returned ``X-OpenHands-ServerConversation-ID`` header is passed
+back on a second call to continue the same OpenHands conversation.
+"""
+
+import os
+from uuid import UUID
+
+import httpx
+from openai import OpenAI
+from scripts.utils import ManagedAPIServer
+
+
+# The gateway runs a full OpenHands agent, but OpenAI clients still need a
+# normal model-like name. We create an LLM profile below and expose it as
+# `openhands_<profile_name>` through `/v1/models`.
+
+api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY")
+assert api_key is not None, "Set LLM_API_KEY or OPENAI_API_KEY."
+
+llm_model = os.getenv("LLM_MODEL", "gpt-5-nano")
+llm_base_url = os.getenv("LLM_BASE_URL")
+profile_name = "gateway_demo"
+gateway_model = f"openhands_{profile_name}"
+
+# Start a local agent-server for the demo. `use_session_api_key=True` turns on
+# authentication; the same key works as both `X-Session-API-Key` for native
+# agent-server routes and `Authorization: Bearer ...` for OpenAI SDK calls.
+
+with ManagedAPIServer(
+    port=8770,
+    use_session_api_key=True,
+    extra_env={
+        "OH_ENABLE_VNC": "0",
+        "OH_ENABLE_VSCODE": "0",
+        "OH_PRELOAD_TOOLS": "0",
+        "OH_SECRET_KEY": "example-secret-key-for-demo-only-32b",
+        "OH_WEBHOOKS": "[]",
+    },
+    health_request_timeout=2.0,
+) as server:
+    session_api_key = (
+        os.getenv("SESSION_API_KEY")
+        or os.getenv("OH_SESSION_API_KEYS_0")
+        or server.session_api_key
+    )
+    assert session_api_key is not None
+
+    # Use the native REST API once to create the profile that backs the gateway
+    # model. After that, normal OpenAI SDK calls are enough for chat traffic.
+    api_client = httpx.Client(
+        base_url=server.base_url,
+        headers={"X-Session-API-Key": session_api_key},
+        timeout=120.0,
+    )
+    openai_client = OpenAI(
+        api_key=session_api_key,
+        base_url=f"{server.base_url}/v1",
+        timeout=120.0,
+    )
+
+    llm_config = {"model": llm_model, "api_key": api_key}
+    if llm_base_url:
+        llm_config["base_url"] = llm_base_url
+
+    # `gateway_demo` becomes visible to OpenAI clients as `openhands_gateway_demo`.
+    profile_response = api_client.post(
+        f"/api/profiles/{profile_name}",
+        json={"llm": llm_config, "include_secrets": True},
+    )
+    assert profile_response.status_code == 201, profile_response.text
+
+    models = openai_client.models.list()
+    model_ids = [model.id for model in models.data]
+    assert gateway_model in model_ids
+    print(f"Gateway models include: {gateway_model}")
+
+    # Ask through the OpenAI SDK. `with_raw_response` lets us read the custom
+    # response header that identifies the OpenHands conversation created behind
+    # this otherwise OpenAI-shaped request.
+
+    first_response = openai_client.chat.completions.with_raw_response.create(
+        model=gateway_model,
+        messages=[
+            {
+                "role": "system",
+                "content": "Answer directly and do not use tools.",
+            },
+            {
+                "role": "user",
+                "content": (
+                    "In one sentence, explain what an OpenAI-compatible "
+                    "agent-server gateway does."
+                ),
+            },
+        ],
+    )
+    first_completion = first_response.parse()
+    conversation_id = first_response.headers.get("X-OpenHands-ServerConversation-ID")
+    assert conversation_id is not None
+    UUID(conversation_id)
+
+    first_answer = first_completion.choices[0].message.content
+    print(f"First answer: {first_answer}")
+    print(f"OpenHands conversation ID: {conversation_id}")
+
+    persisted_response = api_client.get(f"/api/conversations/{conversation_id}")
+    assert persisted_response.status_code == 200, persisted_response.text
+
+    # The gateway keeps conversations by default. Passing the header back lets
+    # another OpenAI-compatible request continue the same server-side agent
+    # conversation instead of starting over.
+
+    second_completion = openai_client.chat.completions.create(
+        model=gateway_model,
+        messages=[
+            {
+                "role": "user",
+                "content": "Now answer in five words or fewer: what did I ask about?",
+            }
+        ],
+        extra_headers={"X-OpenHands-ServerConversation-ID": conversation_id},
+    )
+    second_answer = second_completion.choices[0].message.content
+    print(f"Second answer using same conversation: {second_answer}")
+
+    conversation_response = api_client.get(f"/api/conversations/{conversation_id}")
+    assert conversation_response.status_code == 200, conversation_response.text
+    stats = conversation_response.json().get("stats") or {}
+    usage_to_metrics = stats.get("usage_to_metrics") or {}
+    accumulated_cost = sum(
+        metrics.get("accumulated_cost", 0.0) for metrics in usage_to_metrics.values()
+    )
+
+    # Clean up the demo resources. Real applications can keep the conversation
+    # ID and inspect it later through the native agent-server API.
+    api_client.delete(f"/api/conversations/{conversation_id}")
+    api_client.delete(f"/api/profiles/{profile_name}")
+    api_client.close()
+
+    print(f"EXAMPLE_COST: {accumulated_cost}")
diff --git a/openhands-agent-server/openhands/agent_server/api.py b/openhands-agent-server/openhands/agent_server/api.py
index 7abf61f592..2d0bfe135e 100644
--- a/openhands-agent-server/openhands/agent_server/api.py
+++ b/openhands-agent-server/openhands/agent_server/api.py
@@ -41,6 +41,10 @@
 from openhands.agent_server.llm_router import llm_router
 from openhands.agent_server.mcp_router import mcp_router
 from openhands.agent_server.middleware import CORSDispatcher
+from openhands.agent_server.openai.router import (
+    create_openai_api_key_dependency,
+    openai_router,
+)
 from openhands.agent_server.profiles_router import profiles_router
 from openhands.agent_server.server_details_router import (
     get_server_info,
@@ -319,6 +323,11 @@ def _add_api_routes(app: FastAPI, config: Config) -> None:
     api_router.include_router(auth_router)
     app.include_router(api_router)
 
+    openai_dependencies = []
+    if config.session_api_keys:
+        openai_dependencies.append(Depends(create_openai_api_key_dependency(config)))
+    app.include_router(openai_router, dependencies=openai_dependencies)
+
     # Workspace static-file routes get their own auth group that accepts
     # EITHER the X-Session-API-Key header OR the workspace session cookie.
     # The cookie is required so that <iframe src> / <img src> embeds of
diff --git a/openhands-agent-server/openhands/agent_server/config.py b/openhands-agent-server/openhands/agent_server/config.py
index b76a18f460..2b734d181e 100644
--- a/openhands-agent-server/openhands/agent_server/config.py
+++ b/openhands-agent-server/openhands/agent_server/config.py
@@ -132,6 +132,12 @@ class Config(BaseModel):
             "The location of the directory where conversations and events are stored."
         ),
     )
+    workspace_path: Path = Field(
+        default=Path("workspace/project"),
+        description=(
+            "Default workspace directory for conversations created by the server."
+        ),
+    )
     bash_events_dir: Path = Field(
         default=Path("workspace/bash_events"),
         description=(
diff --git a/openhands-agent-server/openhands/agent_server/openai/README.md b/openhands-agent-server/openhands/agent_server/openai/README.md
new file mode 100644
index 0000000000..d1a82843ae
--- /dev/null
+++ b/openhands-agent-server/openhands/agent_server/openai/README.md
@@ -0,0 +1,9 @@
+# OpenAI-compatible gateway
+
+This package contains the agent-server implementation for the OpenAI-compatible API surface under `/v1`.
+
+- `router.py` defines the FastAPI routes and maps OpenAI-style bearer authentication to the existing session key mechanism.
+- `models.py` contains the small server-side request models and aliases the reusable OpenAI response models.
+- `service.py` translates OpenAI chat completion requests into OpenHands conversations, waits for completion, and returns OpenAI-shaped responses.
+
+The gateway intentionally stays separate from the native agent-server routers so the OpenAI compatibility layer can evolve without mixing protocol translation code into the core REST API modules.
diff --git a/openhands-agent-server/openhands/agent_server/openai/__init__.py b/openhands-agent-server/openhands/agent_server/openai/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/openhands-agent-server/openhands/agent_server/openai/models.py b/openhands-agent-server/openhands/agent_server/openai/models.py
new file mode 100644
index 0000000000..0fc587b5f6
--- /dev/null
+++ b/openhands-agent-server/openhands/agent_server/openai/models.py
@@ -0,0 +1,48 @@
+"""Models for the OpenAI-compatible agent-server gateway."""
+
+from typing import Literal
+
+from openai.types import CompletionUsage, Model
+from openai.types.chat import ChatCompletion
+from openai.types.chat.chat_completion import Choice
+from openai.types.chat.chat_completion_message import ChatCompletionMessage
+from pydantic import BaseModel, ConfigDict
+
+
+OpenAIChatCompletionChoice = Choice
+OpenAIChatCompletionResponse = ChatCompletion
+OpenAIModel = Model
+OpenAIResponseMessage = ChatCompletionMessage
+OpenAIUsage = CompletionUsage
+
+
+class OpenAIImageURL(BaseModel):
+    url: str
+
+
+class OpenAIContentPart(BaseModel):
+    type: str
+    text: str | None = None
+    image_url: OpenAIImageURL | str | None = None
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class OpenAIChatMessage(BaseModel):
+    role: Literal["system", "user", "assistant", "tool"]
+    content: str | list[OpenAIContentPart] | None = None
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class OpenAIChatCompletionRequest(BaseModel):
+    model: str
+    messages: list[OpenAIChatMessage]
+    stream: bool = False
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class OpenAIModelListResponse(BaseModel):
+    object: Literal["list"] = "list"
+    data: list[OpenAIModel]
diff --git a/openhands-agent-server/openhands/agent_server/openai/router.py b/openhands-agent-server/openhands/agent_server/openai/router.py
new file mode 100644
index 0000000000..b3e3118e7e
--- /dev/null
+++ b/openhands-agent-server/openhands/agent_server/openai/router.py
@@ -0,0 +1,95 @@
+"""OpenAI-compatible gateway routes for the agent server."""
+
+from typing import Annotated
+from uuid import UUID
+
+from fastapi import APIRouter, Depends, Header, HTTPException, Request, Response, status
+from fastapi.security import APIKeyHeader, HTTPAuthorizationCredentials, HTTPBearer
+
+from openhands.agent_server.config import Config
+from openhands.agent_server.conversation_service import ConversationService
+from openhands.agent_server.dependencies import get_conversation_service
+from openhands.agent_server.openai.models import (
+    OpenAIChatCompletionRequest,
+    OpenAIChatCompletionResponse,
+    OpenAIModelListResponse,
+)
+from openhands.agent_server.openai.service import (
+    list_openai_models,
+    run_chat_completion,
+)
+
+
+openai_router = APIRouter(tags=["OpenAI Compatibility"])
+
+_SESSION_API_KEY_HEADER = APIKeyHeader(name="X-Session-API-Key", auto_error=False)
+_AUTHORIZATION_HEADER = HTTPBearer(auto_error=False)
+
+
+def create_openai_api_key_dependency(config: Config):
+    """Accept the same session key through OpenHands and OpenAI auth shapes.
+
+    ``X-Session-API-Key`` preserves compatibility with existing agent-server
+    clients, while ``Authorization: Bearer`` lets OpenAI-compatible clients use
+    their standard API-key header. Both forms validate against
+    ``config.session_api_keys``; this does not introduce a second credential
+    system. When no session keys are configured, the local server remains
+    unauthenticated like the existing agent-server API.
+    """
+
+    def check_openai_api_key(
+        session_api_key: str | None = Depends(_SESSION_API_KEY_HEADER),
+        authorization: HTTPAuthorizationCredentials | None = Depends(
+            _AUTHORIZATION_HEADER
+        ),
+    ) -> None:
+        if not config.session_api_keys:
+            return
+        bearer_token = authorization.credentials if authorization else None
+        if session_api_key in config.session_api_keys:
+            return
+        if bearer_token in config.session_api_keys:
+            return
+        raise HTTPException(status.HTTP_401_UNAUTHORIZED)
+
+    return check_openai_api_key
+
+
+def _get_config(request: Request) -> Config:
+    config = getattr(request.app.state, "config", None)
+    if not isinstance(config, Config):
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Agent server config is not available",
+        )
+    return config
+
+
+@openai_router.get("/v1/models", response_model=OpenAIModelListResponse)
+async def get_openai_models(request: Request) -> OpenAIModelListResponse:
+    _get_config(request)
+    return await list_openai_models()
+
+
+@openai_router.post(
+    "/v1/chat/completions",
+    response_model=OpenAIChatCompletionResponse,
+    response_model_exclude_none=True,
+)
+async def create_chat_completion(
+    body: OpenAIChatCompletionRequest,
+    request: Request,
+    response: Response,
+    x_openhands_server_conversation_id: Annotated[
+        UUID | None, Header(alias="X-OpenHands-ServerConversation-ID")
+    ] = None,
+    conversation_service: ConversationService = Depends(get_conversation_service),
+) -> OpenAIChatCompletionResponse:
+    result = await run_chat_completion(
+        request=body,
+        config=_get_config(request),
+        conversation_service=conversation_service,
+        reusable_conversation_id=x_openhands_server_conversation_id,
+    )
+    response.headers["X-OpenHands-ServerConversation-ID"] = str(result.conversation_id)
+    return result.response
diff --git a/openhands-agent-server/openhands/agent_server/openai/service.py b/openhands-agent-server/openhands/agent_server/openai/service.py
new file mode 100644
index 0000000000..9e314764c6
--- /dev/null
+++ b/openhands-agent-server/openhands/agent_server/openai/service.py
@@ -0,0 +1,392 @@
+"""Service logic for the OpenAI-compatible agent-server gateway."""
+
+import asyncio
+import time
+from dataclasses import dataclass
+from uuid import UUID, uuid4
+
+from fastapi import HTTPException, status
+
+from openhands.agent_server.config import Config
+from openhands.agent_server.conversation_service import ConversationService
+from openhands.agent_server.event_service import EventService
+from openhands.agent_server.openai.models import (
+    OpenAIChatCompletionChoice,
+    OpenAIChatCompletionRequest,
+    OpenAIChatCompletionResponse,
+    OpenAIChatMessage,
+    OpenAIModel,
+    OpenAIModelListResponse,
+    OpenAIResponseMessage,
+    OpenAIUsage,
+)
+from openhands.agent_server.persistence import PersistedSettings, get_settings_store
+from openhands.sdk import LLM, Message
+from openhands.sdk.context.agent_context import AgentContext
+from openhands.sdk.conversation.request import (
+    SendMessageRequest,
+    StartConversationRequest,
+)
+from openhands.sdk.conversation.state import (
+    ConversationExecutionStatus,
+    ConversationState,
+)
+from openhands.sdk.llm.llm_profile_store import LLMProfileStore
+from openhands.sdk.llm.message import ImageContent, TextContent
+from openhands.sdk.settings import ACPAgentSettings, OpenHandsAgentSettings
+from openhands.sdk.workspace import LocalWorkspace
+
+
+_MODEL_PREFIX = "openhands_"
+# Fixed gateway defaults are sufficient for the initial local-first endpoint;
+# promote them to Config only if clients need deployment-specific tuning.
+_GATEWAY_TIMEOUT_SECONDS = 120.0
+_POLL_INTERVAL_SECONDS = 2
+
+
+@dataclass(frozen=True)
+class OpenAIChatCompletionResult:
+    response: OpenAIChatCompletionResponse
+    conversation_id: UUID
+
+
+def _profile_name_from_model(model: str) -> str:
+    if model.startswith(_MODEL_PREFIX) and len(model) > len(_MODEL_PREFIX):
+        return model[len(_MODEL_PREFIX) :]
+    raise HTTPException(
+        status_code=status.HTTP_404_NOT_FOUND,
+        detail=f"Unknown OpenHands model '{model}'. Use GET /v1/models.",
+    )
+
+
+def _load_profile_llm(profile_name: str, config: Config) -> LLM:
+    try:
+        return LLMProfileStore().load(profile_name, cipher=config.cipher)
+    except FileNotFoundError:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Profile '{profile_name}' not found",
+        )
+    except TimeoutError:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Profile store is busy. Please retry.",
+        )
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc))
+
+
+def _append_system_suffix(existing: str | None, system_text: str) -> str:
+    return "\n\n".join(
+        text for text in ((existing or "").strip(), system_text.strip()) if text
+    )
+
+
+def _with_profile_llm_and_system_text(
+    agent_settings: OpenHandsAgentSettings | ACPAgentSettings,
+    llm: LLM,
+    system_text: str,
+) -> OpenHandsAgentSettings | ACPAgentSettings:
+    updated = agent_settings.model_copy(update={"llm": llm})
+    if not system_text:
+        return updated
+
+    if isinstance(updated, OpenHandsAgentSettings):
+        context = updated.agent_context
+        suffix = _append_system_suffix(context.system_message_suffix, system_text)
+        return updated.model_copy(
+            update={
+                "agent_context": context.model_copy(
+                    update={"system_message_suffix": suffix}
+                )
+            }
+        )
+
+    context = updated.agent_context or AgentContext()
+    suffix = _append_system_suffix(context.system_message_suffix, system_text)
+    return updated.model_copy(
+        update={
+            "agent_context": context.model_copy(
+                update={"system_message_suffix": suffix}
+            )
+        }
+    )
+
+
+def _content_to_sdk_parts(
+    message: OpenAIChatMessage,
+) -> list[TextContent | ImageContent]:
+    content = message.content
+    if content is None:
+        return []
+    if isinstance(content, str):
+        return [TextContent(text=content)]
+
+    parts: list[TextContent | ImageContent] = []
+    for part in content:
+        if part.type == "text":
+            if part.text:
+                parts.append(TextContent(text=part.text))
+            continue
+        if part.type == "image_url":
+            if isinstance(part.image_url, str):
+                image_url = part.image_url
+            elif part.image_url is not None:
+                image_url = part.image_url.url
+            else:
+                raise HTTPException(
+                    status_code=status.HTTP_400_BAD_REQUEST,
+                    detail="image_url content part is missing a url",
+                )
+            parts.append(ImageContent(image_urls=[image_url]))
+            continue
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Unsupported content part type: {part.type}",
+        )
+    return parts
+
+
+def _message_text(message: OpenAIChatMessage) -> str:
+    text_parts: list[str] = []
+    for part in _content_to_sdk_parts(message):
+        if isinstance(part, TextContent):
+            text_parts.append(part.text)
+    return "\n".join(text_parts)
+
+
+def _latest_user_message(messages: list[OpenAIChatMessage]) -> OpenAIChatMessage:
+    for message in reversed(messages):
+        if message.role == "user":
+            return message
+    raise HTTPException(
+        status_code=status.HTTP_400_BAD_REQUEST,
+        detail="At least one user message is required",
+    )
+
+
+def _system_text(messages: list[OpenAIChatMessage]) -> str:
+    text_parts: list[str] = []
+    for message in messages:
+        if message.role != "system":
+            continue
+        text = _message_text(message)
+        if text:
+            text_parts.append(text)
+    return "\n\n".join(text_parts)
+
+
+def _conversation_request(
+    *,
+    request: OpenAIChatCompletionRequest,
+    config: Config,
+    conversation_id: UUID | None,
+) -> StartConversationRequest:
+    profile_name = _profile_name_from_model(request.model)
+    llm = _load_profile_llm(profile_name, config)
+    settings = get_settings_store(config).load() or PersistedSettings()
+    agent_settings = _with_profile_llm_and_system_text(
+        settings.agent_settings,
+        llm,
+        _system_text(request.messages),
+    )
+    user_message = _latest_user_message(request.messages)
+    conversation_settings = settings.conversation_settings.model_copy(
+        update={"agent_settings": agent_settings}
+    )
+    return conversation_settings.create_request(
+        StartConversationRequest,
+        workspace=LocalWorkspace(working_dir=config.workspace_path),
+        conversation_id=conversation_id,
+        initial_message=SendMessageRequest(
+            role="user",
+            content=_content_to_sdk_parts(user_message),
+            run=True,
+        ),
+        autotitle=False,
+    )
+
+
+# Keep this server-side waiter close to the gateway for readability. It follows
+# the existing status-polling pattern, while RemoteConversation owns the richer
+# client-side WebSocket fallback; we can consolidate if this grows in follow-up.
+async def _wait_for_completion(
+    event_service: EventService,
+    *,
+    allow_existing_response: bool,
+    min_event_count: int | None = None,
+    timeout_seconds: float = _GATEWAY_TIMEOUT_SECONDS,
+) -> ConversationExecutionStatus:
+    deadline = time.monotonic() + timeout_seconds
+    observed_run = False
+    last_status = ConversationExecutionStatus.IDLE
+
+    while True:
+        state = await event_service.get_state()
+        last_status = state.execution_status
+        enough_new_events = (
+            min_event_count is None or len(state.events) > min_event_count
+        )
+        if last_status == ConversationExecutionStatus.RUNNING:
+            observed_run = True
+        elif last_status.is_terminal() and (
+            allow_existing_response or observed_run or enough_new_events
+        ):
+            return last_status
+        elif observed_run and enough_new_events:
+            return last_status
+        elif (
+            allow_existing_response
+            and enough_new_events
+            and await event_service.get_agent_final_response()
+        ):
+            return last_status
+
+        if time.monotonic() >= deadline:
+            raise HTTPException(
+                status_code=status.HTTP_504_GATEWAY_TIMEOUT,
+                detail="Agent run timed out",
+            )
+        await asyncio.sleep(_POLL_INTERVAL_SECONDS)
+
+
+def _raise_for_terminal_error(status_value: ConversationExecutionStatus) -> None:
+    if status_value in (
+        ConversationExecutionStatus.ERROR,
+        ConversationExecutionStatus.STUCK,
+    ):
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Agent run ended with status: {status_value.value}",
+        )
+    if status_value in (
+        ConversationExecutionStatus.PAUSED,
+        ConversationExecutionStatus.WAITING_FOR_CONFIRMATION,
+    ):
+        raise HTTPException(
+            status_code=status.HTTP_409_CONFLICT,
+            detail=f"Agent run ended with status: {status_value.value}",
+        )
+
+
+def _openai_usage_from_state(state: ConversationState) -> OpenAIUsage:
+    token_usage = state.stats.get_combined_metrics().accumulated_token_usage
+    if token_usage is None:
+        return OpenAIUsage(
+            prompt_tokens=0,
+            completion_tokens=0,
+            total_tokens=0,
+        )
+
+    prompt_tokens = token_usage.prompt_tokens
+    completion_tokens = token_usage.completion_tokens
+    return OpenAIUsage(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        total_tokens=prompt_tokens + completion_tokens,
+    )
+
+
+async def list_openai_models() -> OpenAIModelListResponse:
+    try:
+        profiles = LLMProfileStore().list_summaries()
+    except TimeoutError:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Profile store is busy. Please retry.",
+        )
+    data = [
+        OpenAIModel(
+            id=f"{_MODEL_PREFIX}{profile['name']}",
+            object="model",
+            created=0,
+            owned_by="openhands",
+        )
+        for profile in profiles
+        if isinstance(profile.get("name"), str)
+    ]
+    data.sort(key=lambda model: model.id)
+    return OpenAIModelListResponse(data=data)
+
+
+async def run_chat_completion(
+    *,
+    request: OpenAIChatCompletionRequest,
+    config: Config,
+    conversation_service: ConversationService,
+    reusable_conversation_id: UUID | None,
+) -> OpenAIChatCompletionResult:
+    if request.stream:
+        # SSE streaming needs incremental agent-event forwarding; add it separately.
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Streaming chat completions are not supported yet",
+        )
+
+    start_request = _conversation_request(
+        request=request,
+        config=config,
+        conversation_id=reusable_conversation_id,
+    )
+    event_service = None
+    conversation_id = reusable_conversation_id
+    min_event_count: int | None = None
+
+    if reusable_conversation_id is not None:
+        event_service = await conversation_service.get_event_service(
+            reusable_conversation_id
+        )
+        if event_service is not None:
+            min_event_count = len((await event_service.get_state()).events) + 1
+            user_message = _latest_user_message(request.messages)
+            await event_service.send_message(
+                Message(role="user", content=_content_to_sdk_parts(user_message)),
+                run=True,
+            )
+    allow_existing_response = event_service is None
+
+    if event_service is None:
+        conversation_info, _ = await conversation_service.start_conversation(
+            start_request
+        )
+        conversation_id = conversation_info.id
+        event_service = await conversation_service.get_event_service(
+            conversation_info.id
+        )
+        if event_service is None:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="Conversation did not start",
+            )
+
+    status_value = await _wait_for_completion(
+        event_service,
+        allow_existing_response=allow_existing_response,
+        min_event_count=min_event_count,
+    )
+    _raise_for_terminal_error(status_value)
+    state = await event_service.get_state()
+    final_response = await event_service.get_agent_final_response()
+    # EventService.get_agent_final_response() returns final text from the SDK's
+    # get_agent_final_response(), so the gateway emits assistant text only.
+    response = OpenAIChatCompletionResponse(
+        id=f"chatcmpl-{uuid4().hex}",
+        object="chat.completion",
+        created=int(time.time()),
+        model=request.model,
+        choices=[
+            OpenAIChatCompletionChoice(
+                index=0,
+                finish_reason="stop",
+                message=OpenAIResponseMessage(
+                    role="assistant",
+                    content=final_response,
+                ),
+            )
+        ],
+        usage=_openai_usage_from_state(state),
+    )
+    assert conversation_id is not None
+    return OpenAIChatCompletionResult(
+        response=response, conversation_id=conversation_id
+    )
diff --git a/openhands-agent-server/pyproject.toml b/openhands-agent-server/pyproject.toml
index a2489d014f..4c39f1046c 100644
--- a/openhands-agent-server/pyproject.toml
+++ b/openhands-agent-server/pyproject.toml
@@ -10,6 +10,7 @@ dependencies = [
   "docker>=7.1,<8",
   "fastapi>=0.104",
   "openhands-sdk",
+  "openai>=2.33.0,<3",
   "pydantic>=2",
   "sqlalchemy>=2",
   "uvicorn>=0.31.1",
diff --git a/tests/agent_server/test_api_authentication.py b/tests/agent_server/test_api_authentication.py
index d24e2f85e4..71a3996764 100644
--- a/tests/agent_server/test_api_authentication.py
+++ b/tests/agent_server/test_api_authentication.py
@@ -118,6 +118,29 @@ def test_api_server_details_no_auth_required(client_with_auth):
     assert response.status_code != 401
 
 
+def test_openai_routes_accept_bearer_session_key(client_with_auth, monkeypatch):
+    class EmptyProfileStore:
+        def list_summaries(self) -> list[dict[str, object]]:
+            return []
+
+    monkeypatch.setattr(
+        "openhands.agent_server.openai.service.LLMProfileStore", EmptyProfileStore
+    )
+
+    response = client_with_auth.get("/v1/models")
+    assert response.status_code == 401
+
+    response = client_with_auth.get(
+        "/v1/models", headers={"Authorization": "Bearer test-key-123"}
+    )
+    assert response.status_code == 200
+
+    response = client_with_auth.get(
+        "/v1/models", headers={"X-Session-API-Key": "test-key-123"}
+    )
+    assert response.status_code == 200
+
+
 def test_api_protected_endpoints_require_auth(client_with_auth):
     """Test that API endpoints under /api prefix require authentication."""
     protected_endpoints = [
diff --git a/tests/agent_server/test_env_parser.py b/tests/agent_server/test_env_parser.py
index d8518a7ee2..725d12336a 100644
--- a/tests/agent_server/test_env_parser.py
+++ b/tests/agent_server/test_env_parser.py
@@ -442,6 +442,7 @@ def test_config_class_parsing(clean_env):
     os.environ["OH_SESSION_API_KEYS_1"] = "key2"
     os.environ["OH_ALLOW_CORS_ORIGINS_0"] = "http://localhost:3000"
     os.environ["OH_CONVERSATIONS_PATH"] = "/custom/conversations"
+    os.environ["OH_WORKSPACE_PATH"] = "/custom/workspace"
     os.environ["OH_ENABLE_VSCODE"] = "false"
 
     config = from_env(Config, "OH")
@@ -449,6 +450,7 @@ def test_config_class_parsing(clean_env):
     assert config.session_api_keys == ["key1", "key2"]
     assert config.allow_cors_origins == ["http://localhost:3000"]
     assert config.conversations_path == Path("/custom/conversations")
+    assert config.workspace_path == Path("/custom/workspace")
     assert config.enable_vscode is False
 
 
diff --git a/tests/cross/test_remote_conversation_live_server.py b/tests/cross/test_remote_conversation_live_server.py
index 3d128abcf2..b61cdb131f 100644
--- a/tests/cross/test_remote_conversation_live_server.py
+++ b/tests/cross/test_remote_conversation_live_server.py
@@ -13,6 +13,7 @@
 from collections.abc import Generator
 from contextlib import contextmanager
 from pathlib import Path
+from unittest.mock import patch
 from uuid import UUID
 
 import httpx
@@ -190,7 +191,6 @@ def fake_completion(
     ):  # type: ignore[no-untyped-def]
         from openhands.sdk.llm.llm_response import LLMResponse
         from openhands.sdk.llm.message import Message
-        from openhands.sdk.llm.utils.metrics import MetricsSnapshot
 
         # Create a minimal ModelResponse with a single assistant message
         litellm_msg = LiteLLMMessage.model_validate(
@@ -209,17 +209,21 @@ def fake_completion(
         # Convert to OpenHands Message
         message = Message.from_llm_chat_message(litellm_msg)
 
-        # Create metrics snapshot
-        metrics_snapshot = MetricsSnapshot(
-            model_name="test-model",
-            accumulated_cost=0.0,
-            max_budget_per_task=None,
-            accumulated_token_usage=None,
+        self.metrics.add_token_usage(
+            prompt_tokens=7,
+            completion_tokens=5,
+            cache_read_tokens=0,
+            cache_write_tokens=0,
+            context_window=8192,
+            response_id="test-resp",
+            reasoning_tokens=0,
         )
 
         # Return LLMResponse as expected by the agent
         return LLMResponse(
-            message=message, metrics=metrics_snapshot, raw_response=raw_response
+            message=message,
+            metrics=self.metrics.get_snapshot(),
+            raw_response=raw_response,
         )
 
     monkeypatch.setattr(LLM, "completion", fake_completion, raising=True)
@@ -581,6 +585,191 @@ def test_remote_conversation_over_real_server(server_env, patched_llm):
         shutil.rmtree(cwd_conversations)
 
 
+def test_openai_chat_completions_gateway_over_real_server(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch, patched_llm
+):
+    from openhands.agent_server import (
+        config as config_module,
+        conversation_service as service_module,
+    )
+    from openhands.sdk.llm.llm_profile_store import LLMProfileStore
+
+    monkeypatch.setattr(config_module, "_default_config", None)
+    monkeypatch.setattr(service_module, "_conversation_service", None)
+    monkeypatch.delenv("OH_WEBHOOKS_0_BASE_URL", raising=False)
+
+    profiles_dir = tmp_path / "profiles"
+    store = LLMProfileStore(base_dir=profiles_dir)
+    store.save(
+        "smoke",
+        LLM(model="gpt-4o-mini", api_key=SecretStr("test")),
+        include_secrets=True,
+    )
+
+    with patch(
+        "openhands.agent_server.openai.service.LLMProfileStore",
+        lambda: LLMProfileStore(base_dir=profiles_dir),
+    ):
+        with live_server_env(tmp_path, monkeypatch) as env:
+            with httpx.Client() as client:
+                models_response = client.get(f"{env['host']}/v1/models", timeout=2.0)
+                assert models_response.status_code == 200
+                assert models_response.json()["data"] == [
+                    {
+                        "id": "openhands_smoke",
+                        "object": "model",
+                        "created": 0,
+                        "owned_by": "openhands",
+                    }
+                ]
+
+                response = client.post(
+                    f"{env['host']}/v1/chat/completions",
+                    json={
+                        "model": "openhands_smoke",
+                        "messages": [
+                            {"role": "system", "content": "Answer briefly."},
+                            {"role": "user", "content": "Say hello."},
+                        ],
+                    },
+                    timeout=10.0,
+                )
+                assert response.status_code == 200
+                body = response.json()
+                assert body["object"] == "chat.completion"
+                assert body["model"] == "openhands_smoke"
+                assert body["choices"][0]["message"] == {
+                    "role": "assistant",
+                    "content": "Hello from patched LLM",
+                }
+                assert body["usage"] == {
+                    "prompt_tokens": 7,
+                    "completion_tokens": 5,
+                    "total_tokens": 12,
+                }
+                conversation_id = response.headers["X-OpenHands-ServerConversation-ID"]
+                UUID(conversation_id)
+                persisted_response = client.get(
+                    f"{env['host']}/api/conversations/{conversation_id}", timeout=2.0
+                )
+                assert persisted_response.status_code == 200
+                assert persisted_response.json()["workspace"]["working_dir"] == str(
+                    env["workspace_path"]
+                )
+
+                reused_response = client.post(
+                    f"{env['host']}/v1/chat/completions",
+                    headers={"X-OpenHands-ServerConversation-ID": conversation_id},
+                    json={
+                        "model": "openhands_smoke",
+                        "messages": [
+                            {"role": "user", "content": "Say hello again."},
+                        ],
+                    },
+                    timeout=10.0,
+                )
+                assert reused_response.status_code == 200
+                assert (
+                    reused_response.headers["X-OpenHands-ServerConversation-ID"]
+                    == conversation_id
+                )
+                assert reused_response.json()["choices"][0]["message"] == {
+                    "role": "assistant",
+                    "content": "Hello from patched LLM",
+                }
+
+
+def test_openai_gateway_replays_frozen_llm_fixtures(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+):
+    import asyncio
+
+    from openai import OpenAI
+
+    from openhands.agent_server import (
+        config as config_module,
+        conversation_service as service_module,
+    )
+    from openhands.agent_server.models import StartConversationRequest
+    from openhands.sdk import Message, TextContent
+    from openhands.sdk.llm.llm_profile_store import LLMProfileStore
+    from openhands.sdk.testing import TestLLM
+    from openhands.sdk.workspace import LocalWorkspace
+
+    monkeypatch.setattr(config_module, "_default_config", None)
+    monkeypatch.setattr(service_module, "_conversation_service", None)
+    monkeypatch.delenv("OH_WEBHOOKS_0_BASE_URL", raising=False)
+
+    fixtures_dir = Path(__file__).parents[1] / "fixtures" / "openai_gateway"
+    fixtures = [
+        json.loads((fixtures_dir / "openai_nano_completion.json").read_text()),
+        json.loads((fixtures_dir / "litellm_haiku_completion.json").read_text()),
+    ]
+
+    profiles_dir = tmp_path / "profiles"
+    store = LLMProfileStore(base_dir=profiles_dir)
+    for fixture in fixtures:
+        store.save(
+            fixture["profile_name"],
+            LLM(model=fixture["backing_model"], api_key=SecretStr("unused")),
+            include_secrets=True,
+        )
+
+    async def start_conversation_with_test_llm(conversation_service, llm: TestLLM):
+        request = StartConversationRequest(
+            agent=Agent(
+                llm=LLM(model="gpt-4o-mini", api_key=SecretStr("unused")),
+                tools=[],
+            ),
+            workspace=LocalWorkspace(working_dir=str(tmp_path / "workspace")),
+            autotitle=False,
+        )
+        info, _ = await conversation_service.start_conversation(request)
+        event_service = await conversation_service.get_event_service(info.id)
+        assert event_service is not None
+        event_service.get_conversation().switch_llm(llm)
+        return info.id
+
+    with patch(
+        "openhands.agent_server.openai.service.LLMProfileStore",
+        lambda: LLMProfileStore(base_dir=profiles_dir),
+    ):
+        with live_server_env(tmp_path, monkeypatch) as env:
+            for fixture in fixtures:
+                expected_content = fixture["response"]["choices"][0]["message"][
+                    "content"
+                ]
+                llm = TestLLM.from_messages(
+                    [
+                        Message(
+                            role="assistant",
+                            content=[TextContent(text=expected_content)],
+                        )
+                    ],
+                    model=fixture["backing_model"],
+                    usage_id=f"frozen-{fixture['profile_name']}",
+                )
+                conversation_id = asyncio.run(
+                    start_conversation_with_test_llm(env["conversation_service"], llm)
+                )
+                client = OpenAI(
+                    api_key="unused",
+                    base_url=f"{env['host']}/v1",
+                    default_headers={
+                        "X-OpenHands-ServerConversation-ID": str(conversation_id)
+                    },
+                    timeout=10,
+                )
+                completion = client.chat.completions.create(
+                    model=fixture["gateway_model"],
+                    messages=fixture["messages"],
+                )
+
+                assert completion.model == fixture["gateway_model"]
+                assert completion.choices[0].message.content == expected_content
+                assert llm.call_count == 1
+
+
 @pytest.mark.skipif(
     sys.platform == "win32",
     reason="The live bash endpoint depends on the Unix terminal backend.",
diff --git a/tests/fixtures/openai_gateway/litellm_haiku_completion.json b/tests/fixtures/openai_gateway/litellm_haiku_completion.json
new file mode 100644
index 0000000000..d1d89cc1f7
--- /dev/null
+++ b/tests/fixtures/openai_gateway/litellm_haiku_completion.json
@@ -0,0 +1,46 @@
+{
+  "backing_model": "litellm_proxy/anthropic/claude-haiku-4-5-20251001",
+  "gateway_model": "openhands_haiku_eval_proxy",
+  "messages": [
+    {
+      "content": "Answer directly and do not use tools.",
+      "role": "system"
+    },
+    {
+      "content": "Write a haiku about a software agent opening an OpenAI-compatible gateway.",
+      "role": "user"
+    }
+  ],
+  "profile_name": "haiku_eval_proxy",
+  "response": {
+    "choices": [
+      {
+        "finish_reason": "stop",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "annotations": null,
+          "audio": null,
+          "content": "Agent stirs to life\nOpens gateway to the minds\nAI streams begin",
+          "function_call": null,
+          "refusal": null,
+          "role": "assistant",
+          "tool_calls": null
+        }
+      }
+    ],
+    "created": 1780752884,
+    "id": "chatcmpl-cbc4bc5e7d2d4ef6b3c3ed3e9bf9fdb9",
+    "model": "openhands_haiku_eval_proxy",
+    "object": "chat.completion",
+    "service_tier": null,
+    "system_fingerprint": null,
+    "usage": {
+      "completion_tokens": 491,
+      "completion_tokens_details": null,
+      "prompt_tokens": 4965,
+      "prompt_tokens_details": null,
+      "total_tokens": 5456
+    }
+  }
+}
diff --git a/tests/fixtures/openai_gateway/openai_nano_completion.json b/tests/fixtures/openai_gateway/openai_nano_completion.json
new file mode 100644
index 0000000000..cb1e304038
--- /dev/null
+++ b/tests/fixtures/openai_gateway/openai_nano_completion.json
@@ -0,0 +1,46 @@
+{
+  "backing_model": "gpt-5-nano",
+  "gateway_model": "openhands_openai_nano",
+  "messages": [
+    {
+      "content": "Answer directly and do not use tools.",
+      "role": "system"
+    },
+    {
+      "content": "In one sentence, explain what an OpenAI-compatible agent-server gateway does.",
+      "role": "user"
+    }
+  ],
+  "profile_name": "openai_nano",
+  "response": {
+    "choices": [
+      {
+        "finish_reason": "stop",
+        "index": 0,
+        "logprobs": null,
+        "message": {
+          "annotations": null,
+          "audio": null,
+          "content": "An OpenAI-compatible agent-server gateway is a bridge that exposes an OpenAI-style API to clients, translating prompts into the agent\u2019s actions and returning the results in the familiar OpenAI chat/completion format.",
+          "function_call": null,
+          "refusal": null,
+          "role": "assistant",
+          "tool_calls": null
+        }
+      }
+    ],
+    "created": 1780752878,
+    "id": "chatcmpl-a948351964a24d2191092e150033b594",
+    "model": "openhands_openai_nano",
+    "object": "chat.completion",
+    "service_tier": null,
+    "system_fingerprint": null,
+    "usage": {
+      "completion_tokens": 818,
+      "completion_tokens_details": null,
+      "prompt_tokens": 3914,
+      "prompt_tokens_details": null,
+      "total_tokens": 4732
+    }
+  }
+}
diff --git a/uv.lock b/uv.lock
index 463163b322..b47f74641c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2465,6 +2465,7 @@ dependencies = [
     { name = "alembic" },
     { name = "docker" },
     { name = "fastapi" },
+    { name = "openai" },
     { name = "openhands-sdk" },
     { name = "pydantic" },
     { name = "sqlalchemy" },
@@ -2479,6 +2480,7 @@ requires-dist = [
     { name = "alembic", specifier = ">=1.13" },
     { name = "docker", specifier = ">=7.1,<8" },
     { name = "fastapi", specifier = ">=0.104" },
+    { name = "openai", specifier = ">=2.33.0,<3" },
     { name = "openhands-sdk", editable = "openhands-sdk" },
     { name = "pydantic", specifier = ">=2" },
     { name = "sqlalchemy", specifier = ">=2" },