diff --git a/openhands-agent-server/openhands/agent_server/api.py b/openhands-agent-server/openhands/agent_server/api.py index 1ff882a20f..067fc02aec 100644 --- a/openhands-agent-server/openhands/agent_server/api.py +++ b/openhands-agent-server/openhands/agent_server/api.py @@ -39,6 +39,7 @@ from openhands.agent_server.hooks_router import hooks_router from openhands.agent_server.llm_router import llm_router from openhands.agent_server.mcp_router import mcp_router +from openhands.agent_server.meta_profiles_router import meta_profiles_router from openhands.agent_server.middleware import CORSDispatcher from openhands.agent_server.openai.router import ( create_openai_api_key_dependency, @@ -316,6 +317,7 @@ def _add_api_routes(app: FastAPI, config: Config) -> None: api_router.include_router(settings_router) api_router.include_router(workspaces_router) api_router.include_router(profiles_router) + api_router.include_router(meta_profiles_router) # /api/auth/* mints workspace cookies and requires the header to bootstrap, # so it lives under the header-only auth group. api_router.include_router(auth_router) diff --git a/openhands-agent-server/openhands/agent_server/meta_profiles_router.py b/openhands-agent-server/openhands/agent_server/meta_profiles_router.py new file mode 100644 index 0000000000..251ac3d3e1 --- /dev/null +++ b/openhands-agent-server/openhands/agent_server/meta_profiles_router.py @@ -0,0 +1,238 @@ +"""HTTP CRUD + activate endpoints for meta-profiles (mirrors profiles_router). + +Unlike LLM profiles, meta-profiles hold no secrets — they are plain JSON +documents persisted via :class:`MetaProfileStore`. +""" + +from collections.abc import Iterator +from contextlib import contextmanager +from typing import Annotated + +from fastapi import APIRouter, HTTPException, Path, Request, status +from pydantic import BaseModel + +from openhands.agent_server._secrets_exposure import get_config +from openhands.agent_server.persistence import ( + PersistedSettings, + get_settings_store, +) +from openhands.sdk.llm.llm_profile_store import PROFILE_NAME_PATTERN +from openhands.sdk.llm.meta_profile_store import ( + MetaProfile, + MetaProfileLimitExceeded, + MetaProfileStore, +) +from openhands.sdk.logger import get_logger + + +logger = get_logger(__name__) + +meta_profiles_router = APIRouter(prefix="/meta-profiles", tags=["Meta-profiles"]) + +MAX_META_PROFILES = 50 + +MetaProfileName = Annotated[ + str, + Path(min_length=1, max_length=64, pattern=PROFILE_NAME_PATTERN), +] + + +class MetaProfileInfo(BaseModel): + name: str + classifier_model: str | None = None + default_model: str | None = None + num_classes: int = 0 + + +class MetaProfileListResponse(BaseModel): + meta_profiles: list[MetaProfileInfo] + active_meta_profile: str | None = None + + +class MetaProfileDetailResponse(BaseModel): + name: str + config: MetaProfile + + +class MetaProfileMutationResponse(BaseModel): + name: str + message: str + + +class ActivateMetaProfileResponse(BaseModel): + name: str + message: str + + +@contextmanager +def _store_errors() -> Iterator[None]: + """Map ``MetaProfileStore`` errors to HTTP responses.""" + try: + yield + except TimeoutError: + # save()/delete() can raise TimeoutError from the file lock under + # contention; surface a retryable 503 instead of a generic 500 + # (mirrors profiles_router._store_errors()). + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Meta-profile store is busy. Please retry.", + ) + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + + +def _set_active_meta_profile_if_matches( + request: Request, old_name: str, new_name: str | None +) -> bool: + config = get_config(request) + settings_store = get_settings_store(config) + settings = settings_store.load() or PersistedSettings() + if settings.active_meta_profile != old_name: + return False + + def update_active(settings: PersistedSettings) -> PersistedSettings: + # Route through PersistedSettings.update() so the change also + # propagates into agent_settings (active_meta_profile + + # enable_classify_and_switch_llm_tool); a direct field assignment + # would leave that nested state stale. + settings.update({"active_meta_profile": new_name}) + return settings + + settings_store.update(update_active) + return True + + +@meta_profiles_router.get("", response_model=MetaProfileListResponse) +async def list_meta_profiles(request: Request) -> MetaProfileListResponse: + """List all saved meta-profiles and the currently active one.""" + config = get_config(request) + settings_store = get_settings_store(config) + settings = settings_store.load() or PersistedSettings() + + store = MetaProfileStore() + with _store_errors(): + summaries = store.list_summaries() + + return MetaProfileListResponse( + meta_profiles=[MetaProfileInfo(**s) for s in summaries], + active_meta_profile=settings.active_meta_profile, + ) + + +@meta_profiles_router.get("/{name}", response_model=MetaProfileDetailResponse) +async def get_meta_profile(name: MetaProfileName) -> MetaProfileDetailResponse: + """Get a meta-profile's full configuration.""" + store = MetaProfileStore() + try: + with _store_errors(): + meta_profile = store.load(name) + except FileNotFoundError: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Meta-profile '{name}' not found", + ) + + return MetaProfileDetailResponse(name=name, config=meta_profile) + + +@meta_profiles_router.post( + "/{name}", + response_model=MetaProfileMutationResponse, + status_code=status.HTTP_201_CREATED, +) +async def save_meta_profile( + name: MetaProfileName, + body: MetaProfile, +) -> MetaProfileMutationResponse: + """Save (create or overwrite) a meta-profile. + + Returns 409 if creating a new meta-profile would exceed + ``MAX_META_PROFILES``. + """ + store = MetaProfileStore() + try: + with _store_errors(): + store.save(name, body, max_profiles=MAX_META_PROFILES) + except MetaProfileLimitExceeded: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=( + f"Meta-profile limit reached ({MAX_META_PROFILES}). " + "Delete a meta-profile before saving a new one." + ), + ) + + logger.info(f"Saved meta-profile '{name}'") + return MetaProfileMutationResponse( + name=name, message=f"Meta-profile '{name}' saved" + ) + + +@meta_profiles_router.delete("/{name}", response_model=MetaProfileMutationResponse) +async def delete_meta_profile( + request: Request, name: MetaProfileName +) -> MetaProfileMutationResponse: + """Delete a meta-profile (idempotent). + + If the deleted meta-profile is the active one, ``active_meta_profile`` is + cleared. + """ + store = MetaProfileStore() + with _store_errors(): + store.delete(name) + if _set_active_meta_profile_if_matches(request, name, None): + logger.info(f"Cleared active_meta_profile for deleted meta-profile '{name}'") + logger.info(f"Deleted meta-profile '{name}'") + return MetaProfileMutationResponse( + name=name, message=f"Meta-profile '{name}' deleted" + ) + + +@meta_profiles_router.post( + "/{name}/activate", response_model=ActivateMetaProfileResponse +) +async def activate_meta_profile( + request: Request, name: MetaProfileName +) -> ActivateMetaProfileResponse: + """Activate a meta-profile by recording it as ``active_meta_profile``. + + Unlike LLM profiles, activating a meta-profile does not mutate the agent's + LLM config — it only records which meta-profile the + ``classify_and_switch_llm`` tool should route with. Returns 404 if the + meta-profile does not exist. + """ + # Verify the meta-profile exists (and is valid) before activating. + store = MetaProfileStore() + try: + with _store_errors(): + store.load(name) + except FileNotFoundError: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Meta-profile '{name}' not found", + ) + + config = get_config(request) + settings_store = get_settings_store(config) + + def apply_active(settings: PersistedSettings) -> PersistedSettings: + # Route through PersistedSettings.update() so activation also wires + # agent_settings (active_meta_profile + enable_classify_and_switch_llm_tool), + # which is what actually attaches the routing tool. A direct field + # assignment would record the active name but never enable the tool. + settings.update({"active_meta_profile": name}) + return settings + + try: + settings_store.update(apply_active) + except (OSError, PermissionError): + logger.error("Failed to activate meta-profile - file I/O error") + raise HTTPException(status_code=500, detail="Failed to activate meta-profile") + + logger.info(f"Activated meta-profile '{name}'") + return ActivateMetaProfileResponse( + name=name, message=f"Meta-profile '{name}' activated" + ) diff --git a/openhands-agent-server/openhands/agent_server/persistence/models.py b/openhands-agent-server/openhands/agent_server/persistence/models.py index e5c1b6f714..2201c48051 100644 --- a/openhands-agent-server/openhands/agent_server/persistence/models.py +++ b/openhands-agent-server/openhands/agent_server/persistence/models.py @@ -55,6 +55,7 @@ class SettingsUpdatePayload(TypedDict, total=False): conversation_settings_diff: dict[str, Any] misc_settings_diff: dict[str, Any] active_profile: str | None + active_meta_profile: str | None def _deep_merge( @@ -140,6 +141,11 @@ class PersistedSettings(BaseModel): default=None, description="Name of the currently active LLM profile.", ) + active_meta_profile: str | None = Field( + default=None, + description="Name of the currently active meta-profile used for " + "intelligent model routing.", + ) misc_settings: dict[str, Any] = Field( default_factory=dict, description=( @@ -248,11 +254,63 @@ def update(self, payload: SettingsUpdatePayload) -> None: # Update active_profile if explicitly provided (including None to clear) if "active_profile" in payload: self.active_profile = payload["active_profile"] + + # Update active_meta_profile if explicitly provided (incl. None) + if "active_meta_profile" in payload: + self._apply_active_meta_profile(payload["active_meta_profile"]) + + # Enforce the invariant even when only ``agent_settings`` changed: + # switching to an agent variant that cannot attach the routing tool + # (e.g. ACP) must not leave a stale top-level ``active_meta_profile`` + # claiming routing is active. (No-op when active already matches.) + self._clear_active_meta_profile_if_unsupported() finally: # Clear conv_merged to minimize plaintext exposure window if conv_merged is not None: conv_merged.clear() + def _agent_supports_routing(self) -> bool: + """Whether the current agent variant can attach the routing tool. + + OpenHands agent settings expose ``active_meta_profile`` / + ``enable_classify_and_switch_llm_tool``; ACP (and any other) variants do + not and never attach :class:`ClassifyAndSwitchLLMTool`. + """ + return "active_meta_profile" in type(self.agent_settings).model_fields + + def _apply_active_meta_profile(self, name: str | None) -> None: + """Set ``active_meta_profile`` and propagate it into agent_settings. + + Propagating into the nested ``agent_settings`` is what enables/uses the + routing tool on the agent built from these settings (mirrors how + activating a profile bakes the LLM into ``agent_settings``). + + The top-level field is a strict facade for the nested state, not a + best-effort hint: if the current agent variant cannot attach the routing + tool (e.g. ACP), the request is dropped and the facade cleared so + persisted state never claims an active router no conversation can use. + """ + if not self._agent_supports_routing(): + self.active_meta_profile = None + return + self.active_meta_profile = name + self.agent_settings = self.agent_settings.model_copy( + update={ + "active_meta_profile": name, + "enable_classify_and_switch_llm_tool": name is not None, + } + ) + + def _clear_active_meta_profile_if_unsupported(self) -> None: + """Clear the facade when the agent variant cannot support routing. + + Guards the agent-kind-switch path: changing ``agent_settings`` to a + non-routing variant (e.g. ACP) while a meta-profile is active would + otherwise leave the top-level ``active_meta_profile`` stale. + """ + if not self._agent_supports_routing() and self.active_meta_profile is not None: + self.active_meta_profile = None + @classmethod def from_persisted( cls, data: Any, *, context: dict[str, Any] | None = None diff --git a/openhands-agent-server/openhands/agent_server/settings_router.py b/openhands-agent-server/openhands/agent_server/settings_router.py index 5c3b410a58..4876cd2bf4 100644 --- a/openhands-agent-server/openhands/agent_server/settings_router.py +++ b/openhands-agent-server/openhands/agent_server/settings_router.py @@ -161,6 +161,7 @@ async def get_settings(request: Request) -> SettingsResponse: ), llm_api_key_is_set=settings.llm_api_key_is_set, active_profile=settings.active_profile, + active_meta_profile=settings.active_meta_profile, misc_settings=settings.misc_settings, ) @@ -206,6 +207,8 @@ async def update_settings( update_data = payload.model_dump(exclude_none=True) if "active_profile" in payload.model_fields_set: update_data["active_profile"] = payload.active_profile + if "active_meta_profile" in payload.model_fields_set: + update_data["active_meta_profile"] = payload.active_meta_profile if not update_data: # No updates provided - this is a client error raise HTTPException( @@ -213,7 +216,7 @@ async def update_settings( detail=( "At least one of agent_settings_diff, " "conversation_settings_diff, misc_settings_diff, " - "or active_profile must be provided" + "active_profile, or active_meta_profile must be provided" ), ) @@ -269,6 +272,7 @@ def apply_update(settings: PersistedSettings) -> PersistedSettings: conversation_settings=settings.conversation_settings.model_dump(mode="json"), llm_api_key_is_set=settings.llm_api_key_is_set, active_profile=settings.active_profile, + active_meta_profile=settings.active_meta_profile, misc_settings=settings.misc_settings, ) diff --git a/openhands-sdk/openhands/sdk/llm/__init__.py b/openhands-sdk/openhands/sdk/llm/__init__.py index 1a823532a9..bd42203fb7 100644 --- a/openhands-sdk/openhands/sdk/llm/__init__.py +++ b/openhands-sdk/openhands/sdk/llm/__init__.py @@ -19,6 +19,12 @@ ThinkingBlock, content_to_str, ) +from openhands.sdk.llm.meta_profile_store import ( + MetaProfile, + MetaProfileClass, + MetaProfileLimitExceeded, + MetaProfileStore, +) from openhands.sdk.llm.router import RouterLLM from openhands.sdk.llm.streaming import ( AsyncTokenCallbackType, @@ -46,6 +52,10 @@ "LLM_PROFILE_SCHEMA_VERSION", "LLMRegistry", "LLMProfileStore", + "MetaProfile", + "MetaProfileClass", + "MetaProfileLimitExceeded", + "MetaProfileStore", "RouterLLM", "RegistryEvent", # Messages diff --git a/openhands-sdk/openhands/sdk/llm/meta_profile_store.py b/openhands-sdk/openhands/sdk/llm/meta_profile_store.py new file mode 100644 index 0000000000..178aa9be6c --- /dev/null +++ b/openhands-sdk/openhands/sdk/llm/meta_profile_store.py @@ -0,0 +1,232 @@ +"""JSON-file storage for meta-profiles under ``~/.openhands/meta-profiles``. + +Key invariant: every model reference in a meta-profile (``classifier_model``, +``default_model``, and each class's ``model``) is the *name of a saved LLM +profile* in :class:`~openhands.sdk.llm.llm_profile_store.LLMProfileStore`, not a +raw model string — credentials/provider settings resolve through that store. +""" + +from __future__ import annotations + +import json +import tempfile +from collections.abc import Iterator +from contextlib import contextmanager +from pathlib import Path +from typing import Any, Final + +from filelock import FileLock, Timeout +from pydantic import BaseModel, Field + +from openhands.sdk.llm.llm_profile_store import PROFILE_NAME_REGEX +from openhands.sdk.logger import get_logger + + +_DEFAULT_META_PROFILE_DIR: Final[Path] = Path.home() / ".openhands" / "meta-profiles" +_LOCK_TIMEOUT_SECONDS: Final[float] = 30.0 + +logger = get_logger(__name__) + + +class MetaProfileLimitExceeded(Exception): + """Raised when saving would exceed the configured meta-profile limit.""" + + +class MetaProfileClass(BaseModel): + """A single task category and the LLM profile that should handle it.""" + + description: str = Field( + description="Natural-language description of the kind of task this " + "class covers (e.g. 'task is UI oriented or requires looking at images')." + ) + model: str = Field( + description="Name of the saved LLM profile to switch to for tasks " + "matching this class." + ) + + +class MetaProfile(BaseModel): + """A declarative model-routing configuration.""" + + classifier_model: str = Field( + description="Name of the saved LLM profile used to classify the task." + ) + default_model: str = Field( + description="Name of the saved LLM profile to use when no class matches." + ) + classes: list[MetaProfileClass] = Field( + default_factory=list, + description="Ordered list of task classes and their target profiles.", + ) + + +class MetaProfileStore: + """Read meta-profiles from ``~/.openhands/meta-profiles`` (by default).""" + + def __init__(self, base_dir: Path | str | None = None) -> None: + """Initialize the meta-profile store. + + Args: + base_dir: Directory where meta-profiles are stored. Defaults to + ``~/.openhands/meta-profiles`` when ``None``. + """ + self.base_dir = ( + Path(base_dir) if base_dir is not None else _DEFAULT_META_PROFILE_DIR + ) + self.base_dir.mkdir(parents=True, exist_ok=True) + self._file_lock = FileLock(self.base_dir / ".meta-profiles.lock") + + @contextmanager + def _acquire_lock(self, timeout: float = _LOCK_TIMEOUT_SECONDS) -> Iterator[None]: + """Acquire the file lock for safe concurrent access. + + The lock is reentrant within a process, so methods that hold it may + call other locked methods (e.g. ``save`` calls ``list``). + + Raises: + TimeoutError: If the lock cannot be acquired within ``timeout``. + """ + try: + with self._file_lock.acquire(timeout=timeout): + yield + except Timeout: + logger.error( + f"[Meta-profile Store] Failed to acquire lock within {timeout}s" + ) + raise TimeoutError( + f"Meta-profile store lock acquisition timed out after {timeout}s" + ) + + def list(self) -> list[str]: + """Return stored meta-profile names (without ``.json``), sorted. + + The order is alphabetical, not chronological. Callers that fall back to + "the first available" meta-profile (when none is explicitly active) get + the alphabetically-first name, which is stable but not "most recent". + """ + return sorted( + p.stem + for p in self.base_dir.glob("*.json") + if PROFILE_NAME_REGEX.match(p.stem) + ) + + def _get_path(self, name: str) -> Path: + clean_name = name.removesuffix(".json") + if not PROFILE_NAME_REGEX.match(clean_name): + raise ValueError( + f"Invalid meta-profile name: {name!r}. " + "Names must be 1-64 characters, start with a letter or digit, " + "and contain only letters, digits, '.', '_', or '-'." + ) + return self.base_dir / f"{clean_name}.json" + + def load(self, name: str) -> MetaProfile: + """Load a meta-profile by name. + + Raises: + FileNotFoundError: If the meta-profile does not exist. + ValueError: If the file is corrupted or fails validation. + """ + path = self._get_path(name) + if not path.exists(): + existing = self.list() + raise FileNotFoundError( + f"Meta-profile `{name}` not found. " + f"Available meta-profiles: {', '.join(existing) or 'none'}" + ) + try: + data = json.loads(path.read_text(encoding="utf-8")) + return MetaProfile.model_validate(data) + except Exception as e: + raise ValueError(f"Failed to load meta-profile `{name}`: {e}") from e + + def save( + self, + name: str, + meta_profile: MetaProfile, + *, + max_profiles: int | None = None, + ) -> None: + """Persist a meta-profile under ``name`` (atomic write, overwrites). + + Args: + name: Name of the meta-profile to save. + meta_profile: The meta-profile to persist. + max_profiles: Optional cap on the number of meta-profiles. When set, + raises :class:`MetaProfileLimitExceeded` if creating a *new* + meta-profile would exceed the limit. + + Raises: + MetaProfileLimitExceeded: If ``max_profiles`` would be exceeded. + ValueError: If ``name`` is not a valid meta-profile name. + """ + path = self._get_path(name) + + # Hold the lock across the precondition check and the atomic replace so + # concurrent creators cannot both pass the limit check and overshoot + # ``max_profiles`` (TOCTOU), mirroring ``LLMProfileStore``. + with self._acquire_lock(): + if max_profiles is not None and not path.exists(): + if len(self.list()) >= max_profiles: + raise MetaProfileLimitExceeded( + f"Meta-profile limit reached ({max_profiles})." + ) + + profile_json = json.dumps(meta_profile.model_dump(mode="json"), indent=2) + with tempfile.NamedTemporaryFile( + mode="w", + dir=self.base_dir, + suffix=".tmp", + delete=False, + encoding="utf-8", + ) as tmp: + tmp.write(profile_json) + tmp_path = Path(tmp.name) + + try: + Path.replace(tmp_path, path) + except Exception: + tmp_path.unlink(missing_ok=True) + raise + logger.info(f"Saved meta-profile `{name}` at {path}") + + def delete(self, name: str) -> None: + """Delete a meta-profile (idempotent — missing names are a no-op). + + Raises: + ValueError: If ``name`` is not a valid meta-profile name. + """ + path = self._get_path(name) + with self._acquire_lock(): + if not path.exists(): + logger.info(f"Meta-profile `{name}` not found. Skipping delete.") + return + path.unlink() + logger.info(f"Deleted meta-profile `{name}`") + + def list_summaries(self) -> list[dict[str, Any]]: + """List meta-profile metadata without full schema validation. + + Files with corrupted JSON or non-dict top-level values are skipped with + a warning so a single bad file never breaks the listing. + """ + summaries: list[dict[str, Any]] = [] + for name in self.list(): + try: + data = json.loads(self._get_path(name).read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as e: + logger.warning(f"Skipping corrupted meta-profile {name!r}: {e}") + continue + if not isinstance(data, dict): + logger.warning(f"Skipping non-dict meta-profile {name!r}") + continue + classes = data.get("classes") or [] + summaries.append( + { + "name": name, + "classifier_model": data.get("classifier_model"), + "default_model": data.get("default_model"), + "num_classes": len(classes) if isinstance(classes, list) else 0, + } + ) + return summaries diff --git a/openhands-sdk/openhands/sdk/settings/api_models.py b/openhands-sdk/openhands/sdk/settings/api_models.py index 52504941fe..247c96aee2 100644 --- a/openhands-sdk/openhands/sdk/settings/api_models.py +++ b/openhands-sdk/openhands/sdk/settings/api_models.py @@ -69,6 +69,10 @@ class SettingsResponse(BaseModel): default=None, description="Name of the currently active LLM profile, if one is selected.", ) + active_meta_profile: str | None = Field( + default=None, + description="Name of the currently active meta-profile, if one is selected.", + ) misc_settings: dict[str, Any] = Field(default_factory=dict) def get_agent_settings(self) -> AgentSettingsConfig: @@ -113,6 +117,11 @@ class SettingsUpdateRequest(BaseModel): pattern=PROFILE_NAME_PATTERN, description="Name of the active LLM profile to persist; null clears it.", ) + active_meta_profile: str | None = Field( + default=None, + pattern=PROFILE_NAME_PATTERN, + description="Name of the active meta-profile to persist; null clears it.", + ) # ── Secrets API Models ──────────────────────────────────────────────────── diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index 624aa9402e..b3da4fd23d 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -1025,6 +1025,35 @@ class OpenHandsAgentSettings(AgentSettingsBase): ).model_dump() }, ) + enable_classify_and_switch_llm_tool: bool = Field( + default=False, + description=( + "Enable the built-in classify_and_switch_llm tool, which routes the " + "task to the best LLM profile using the active meta-profile. When no " + "active_meta_profile is set, the first available meta-profile is used." + ), + json_schema_extra={ + SETTINGS_METADATA_KEY: SettingsFieldMetadata( + label="Enable intelligent model routing tool", + prominence=SettingProminence.MINOR, + variant="openhands", + ).model_dump() + }, + ) + active_meta_profile: str | None = Field( + default=None, + description=( + "Name of the active meta-profile (in ~/.openhands/meta-profiles) used " + "by the classify_and_switch_llm tool to route tasks to LLM profiles." + ), + json_schema_extra={ + SETTINGS_METADATA_KEY: SettingsFieldMetadata( + label="Active meta-profile", + prominence=SettingProminence.MINOR, + variant="openhands", + ).model_dump() + }, + ) tool_concurrency_limit: int = Field( default=1, ge=1, @@ -1119,7 +1148,12 @@ def create_agent(self) -> Agent: """ from openhands.sdk.agent import Agent from openhands.sdk.llm.auth.openai import create_subscription_llm_from_config - from openhands.sdk.tool.builtins import BUILT_IN_TOOLS, SwitchLLMTool + from openhands.sdk.tool import Tool + from openhands.sdk.tool.builtins import ( + BUILT_IN_TOOLS, + ClassifyAndSwitchLLMTool, + SwitchLLMTool, + ) # Bypass ``_serialize_mcp_config``: MCP servers need real env/headers. mcp_config = ( @@ -1131,11 +1165,24 @@ def create_agent(self) -> Agent: if self.enable_switch_llm_tool: include_default_tools.append(SwitchLLMTool.__name__) + # The routing tool needs the active meta-profile name, which the + # name-only ``include_default_tools`` path cannot pass, so add it as a + # ``Tool`` spec carrying the param. When no meta-profile is active, the + # tool falls back to the first available one, so we still wire it. + tools = list(self.tools) + if self.enable_classify_and_switch_llm_tool: + params = ( + {"active_meta_profile": self.active_meta_profile} + if self.active_meta_profile + else {} + ) + tools.append(Tool(name=ClassifyAndSwitchLLMTool.__name__, params=params)) + llm = create_subscription_llm_from_config(self.llm) condenser = None if llm.is_subscription else self.build_condenser(llm) return Agent( llm=llm, - tools=self.tools, + tools=tools, mcp_config=mcp_config, include_default_tools=include_default_tools, agent_context=self.agent_context, diff --git a/openhands-sdk/openhands/sdk/tool/builtins/__init__.py b/openhands-sdk/openhands/sdk/tool/builtins/__init__.py index 15dbf75e67..abc6e12f6e 100644 --- a/openhands-sdk/openhands/sdk/tool/builtins/__init__.py +++ b/openhands-sdk/openhands/sdk/tool/builtins/__init__.py @@ -5,6 +5,12 @@ For tools that require interacting with the environment, add them to `openhands-tools`. """ +from openhands.sdk.tool.builtins.classify_and_switch_llm import ( + ClassifyAndSwitchLLMAction, + ClassifyAndSwitchLLMExecutor, + ClassifyAndSwitchLLMObservation, + ClassifyAndSwitchLLMTool, +) from openhands.sdk.tool.builtins.finish import ( FinishAction, FinishExecutor, @@ -43,11 +49,16 @@ **{tool.__name__: tool for tool in BUILT_IN_TOOLS}, InvokeSkillTool.__name__: InvokeSkillTool, SwitchLLMTool.__name__: SwitchLLMTool, + ClassifyAndSwitchLLMTool.__name__: ClassifyAndSwitchLLMTool, } __all__ = [ "BUILT_IN_TOOLS", "BUILT_IN_TOOL_CLASSES", + "ClassifyAndSwitchLLMTool", + "ClassifyAndSwitchLLMAction", + "ClassifyAndSwitchLLMObservation", + "ClassifyAndSwitchLLMExecutor", "FinishTool", "FinishAction", "FinishObservation", diff --git a/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py b/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py new file mode 100644 index 0000000000..5d0e1504d2 --- /dev/null +++ b/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py @@ -0,0 +1,346 @@ +"""Built-in tool that classifies the current task and switches LLM profile. + +The tool reads the *active meta-profile* (see +:class:`~openhands.sdk.llm.meta_profile_store.MetaProfile`), asks the +meta-profile's ``classifier_model`` to categorize the current task using the +last few conversation messages, and switches the conversation to the LLM +profile mapped to the chosen class (falling back to ``default_model``). +""" + +import re +from collections.abc import Sequence +from typing import TYPE_CHECKING, Self + +from pydantic import Field +from rich.text import Text + +from openhands.sdk.llm.meta_profile_store import MetaProfile, MetaProfileStore +from openhands.sdk.logger import get_logger +from openhands.sdk.tool.registry import register_tool +from openhands.sdk.tool.tool import ( + Action, + Observation, + ToolAnnotations, + ToolDefinition, + ToolExecutor, +) + + +if TYPE_CHECKING: + from openhands.sdk.conversation.impl.local_conversation import LocalConversation + from openhands.sdk.conversation.state import ConversationState + + +logger = get_logger(__name__) + +# Number of trailing conversation messages shown to the classifier. +_RECENT_MESSAGE_LIMIT = 6 + + +class ClassifyAndSwitchLLMAction(Action): + """Trigger classification of the current task and switch the LLM profile.""" + + @property + def visualize(self) -> Text: + content = Text() + content.append("Classify task and switch LLM", style="bold magenta") + return content + + +class ClassifyAndSwitchLLMObservation(Observation): + """Result of classifying the task and switching the LLM profile.""" + + chosen_class: str | None = Field( + default=None, + description="Description of the matched class, or None when the " + "default model was used.", + ) + model: str | None = Field( + default=None, + description="Name of the LLM profile that was activated.", + ) + active_model: str | None = Field( + default=None, + description="Model configured by the activated profile, when available.", + ) + + @property + def visualize(self) -> Text: + content = Text() + if self.is_error: + content.append("Failed to classify and switch LLM", style="bold red") + else: + content.append("Classified task and switched LLM", style="bold green") + if self.model: + content.append(f": {self.model}") + if self.active_model: + content.append(f" ({self.active_model})") + if self.chosen_class: + content.append("\nMatched class: ", style="bold") + content.append(self.chosen_class) + return content + + +def build_classifier_prompt(meta: MetaProfile) -> str: + """Build the classifier system prompt listing the meta-profile classes.""" + lines = [ + "You are a model-routing classifier. Based on the recent conversation, " + "pick the single category that best describes the current task.", + "", + "Categories:", + ] + for i, cls in enumerate(meta.classes, start=1): + lines.append(f"{i}. {cls.description}") + lines.append("") + lines.append( + "Respond with ONLY the number of the best matching category. " + "If none of the categories clearly apply, respond with 0." + ) + return "\n".join(lines) + + +def parse_class_index(text: str, num_classes: int) -> int: + """Parse the classifier reply into a class index. + + Returns 0 (use ``default_model``) when no in-range integer is found. + """ + match = re.search(r"-?\d+", text) + if match is None: + return 0 + index = int(match.group()) + if 1 <= index <= num_classes: + return index + return 0 + + +def _recent_messages_text( + conversation: "LocalConversation", limit: int = _RECENT_MESSAGE_LIMIT +) -> str: + """Return a transcript of the last ``limit`` conversation messages. + + Only ``MessageEvent`` is included on purpose: user/assistant messages are + the task signal the classifier needs. Action/observation events (tool calls, + outputs) are deliberately excluded — they are noisy, can be large, and don't + describe the task any better than the surrounding messages. + """ + from openhands.sdk.event.llm_convertible.message import MessageEvent + from openhands.sdk.llm import content_to_str + + messages: list[str] = [] + for event in conversation.state.events: + if not isinstance(event, MessageEvent): + continue + text = "\n".join(content_to_str(event.llm_message.content)).strip() + if text: + messages.append(f"{event.source}: {text}") + return "\n".join(messages[-limit:]) + + +class ClassifyAndSwitchLLMExecutor(ToolExecutor): + def __init__( + self, + meta_profile_store: MetaProfileStore, + active_meta_profile: str | None = None, + ) -> None: + # Resolve the meta-profile lazily (at invocation), not at construction, + # so a missing/renamed file under ~/.openhands/meta-profiles produces a + # tool error instead of breaking conversation startup. + self._store = meta_profile_store + self._active_meta_profile = active_meta_profile + + def _resolve_meta_profile(self) -> MetaProfile: + """Resolve the active meta-profile, falling back to the first available. + + Raises: + FileNotFoundError: If no meta-profile can be resolved. + ValueError: If the resolved meta-profile is invalid. + """ + name = self._active_meta_profile + if not name: + available = self._store.list() + if not available: + raise FileNotFoundError( + "No meta-profile is active and none are available in the " + "meta-profile store." + ) + # ``list()`` is alphabetically sorted, so this is the + # alphabetically-first meta-profile, not the most recently saved. + name = available[0] + logger.info( + "No active meta-profile set; falling back to first available: %r", + name, + ) + return self._store.load(name) + + def __call__( + self, + action: ClassifyAndSwitchLLMAction, # noqa: ARG002 + conversation: "LocalConversation | None" = None, + ) -> ClassifyAndSwitchLLMObservation: + from openhands.sdk.llm import Message, TextContent, content_to_str + + if conversation is None: + return ClassifyAndSwitchLLMObservation.from_text( + text="Cannot classify and switch LLM without an active conversation.", + is_error=True, + ) + + try: + meta = self._resolve_meta_profile() + except (FileNotFoundError, ValueError) as exc: + return ClassifyAndSwitchLLMObservation.from_text( + text=f"Failed to resolve the active meta-profile: {exc}", + is_error=True, + ) + # 1) Load the classifier LLM (a saved profile), respecting at-rest cipher, + # and register it in the conversation's LLM registry under a stable + # usage id. Registration is what makes the classifier completion's + # tokens/cost flow into ``conversation.conversation_stats`` and count + # against ``max_budget_per_run`` — calling an unregistered LLM would + # spend off the books (mirrors the ``ask-agent-llm`` pattern). Caching + # by usage id means repeated routing calls reuse one metrics bucket. + usage_id = f"classifier:{meta.classifier_model}" + try: + classifier_llm = conversation.llm_registry.get(usage_id) + except KeyError: + try: + loaded = conversation._profile_store.load( + meta.classifier_model, cipher=conversation._cipher + ) + except (FileNotFoundError, ValueError) as exc: + return ClassifyAndSwitchLLMObservation.from_text( + text=( + f"Failed to load classifier profile " + f"'{meta.classifier_model}': {exc}" + ), + is_error=True, + ) + classifier_llm = loaded.model_copy(update={"usage_id": usage_id}) + conversation.llm_registry.add(classifier_llm) + + # 2) Single classifier call over the recent conversation. + transcript = _recent_messages_text(conversation) or "(no messages yet)" + messages = [ + Message( + role="system", + content=[TextContent(text=build_classifier_prompt(meta))], + ), + Message( + role="user", + content=[ + TextContent( + text=f"Recent conversation:\n{transcript}\n\nCategory number:" + ) + ], + ), + ] + try: + response = classifier_llm.completion(messages) + except Exception as exc: + return ClassifyAndSwitchLLMObservation.from_text( + text=f"Classifier call failed: {type(exc).__name__}: {exc}", + is_error=True, + ) + + reply = "\n".join(content_to_str(response.message.content)) + index = parse_class_index(reply, len(meta.classes)) + + # 3) Resolve the target profile (chosen class or default). + if index == 0: + target_profile = meta.default_model + chosen_class = None + else: + chosen = meta.classes[index - 1] + target_profile = chosen.model + chosen_class = chosen.description + + # 4) Switch the conversation to the target profile. + try: + conversation.switch_profile(target_profile) + except FileNotFoundError: + return ClassifyAndSwitchLLMObservation.from_text( + text=f"Target LLM profile '{target_profile}' was not found.", + is_error=True, + model=target_profile, + chosen_class=chosen_class, + ) + except Exception as exc: + return ClassifyAndSwitchLLMObservation.from_text( + text=( + f"Failed to switch to LLM profile '{target_profile}': " + f"{type(exc).__name__}: {exc}" + ), + is_error=True, + model=target_profile, + chosen_class=chosen_class, + ) + + active_model = conversation.agent.llm.model + label = chosen_class or "default" + return ClassifyAndSwitchLLMObservation.from_text( + text=( + f"Classified task as '{label}' and switched to LLM profile " + f"'{target_profile}' (model '{active_model}'). " + "Future agent steps will use this profile." + ), + chosen_class=chosen_class, + model=target_profile, + active_model=active_model, + ) + + +_DESCRIPTION = ( + "Classify the current task and switch this conversation to the most " + "suitable saved LLM profile.\n\n" + "Use this near the start of a task (or when the task changes) to route " + "the work to the best model. A classifier model inspects the recent " + "conversation and picks a category from the active meta-profile; the " + "conversation then switches to that category's LLM profile. The switch " + "takes effect on the next LLM call." +) + + +class ClassifyAndSwitchLLMTool( + ToolDefinition[ClassifyAndSwitchLLMAction, ClassifyAndSwitchLLMObservation] +): + """Tool that classifies the task and switches to a meta-profile's LLM.""" + + @classmethod + def create( + cls, + conv_state: "ConversationState | None" = None, # noqa: ARG003 + active_meta_profile: str | None = None, + meta_profile_store: MetaProfileStore | None = None, + **params, + ) -> Sequence[Self]: + if params: + raise ValueError( + "ClassifyAndSwitchLLMTool only accepts 'active_meta_profile' " + "and 'meta_profile_store'." + ) + + # Meta-profile resolution is deferred to invocation time (see + # ClassifyAndSwitchLLMExecutor) so user-managed files under + # ~/.openhands/meta-profiles cannot break conversation startup; a + # missing/invalid profile surfaces as a tool error instead. + store = meta_profile_store or MetaProfileStore() + return [ + cls( + description=_DESCRIPTION, + action_type=ClassifyAndSwitchLLMAction, + observation_type=ClassifyAndSwitchLLMObservation, + executor=ClassifyAndSwitchLLMExecutor(store, active_meta_profile), + annotations=ToolAnnotations( + readOnlyHint=False, + destructiveHint=False, + idempotentHint=False, + openWorldHint=False, + ), + ) + ] + + +# Registered so it can be resolved from a ``Tool`` spec carrying the +# ``active_meta_profile`` param (the built-in ``include_default_tools`` path +# cannot pass params). +register_tool(ClassifyAndSwitchLLMTool.__name__, ClassifyAndSwitchLLMTool) diff --git a/tests/agent_server/test_active_meta_profile.py b/tests/agent_server/test_active_meta_profile.py new file mode 100644 index 0000000000..6af08a9ed5 --- /dev/null +++ b/tests/agent_server/test_active_meta_profile.py @@ -0,0 +1,102 @@ +"""Tests for active_meta_profile persistence and propagation (Option A).""" + +from openhands.agent_server.persistence.models import PersistedSettings +from openhands.sdk.settings.model import ACPAgentSettings, OpenHandsAgentSettings + + +def test_update_sets_top_level_and_propagates_into_agent_settings() -> None: + settings = PersistedSettings() + + settings.update({"active_meta_profile": "balanced"}) + + assert settings.active_meta_profile == "balanced" + # OpenHandsAgentSettings (default) gains the routing tool config. + agent = settings.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + assert agent.active_meta_profile == "balanced" + assert agent.enable_classify_and_switch_llm_tool is True + + +def test_update_clearing_disables_tool() -> None: + settings = PersistedSettings() + settings.update({"active_meta_profile": "balanced"}) + + settings.update({"active_meta_profile": None}) + + assert settings.active_meta_profile is None + agent = settings.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + assert agent.active_meta_profile is None + assert agent.enable_classify_and_switch_llm_tool is False + + +def test_update_without_active_meta_profile_leaves_it_unchanged() -> None: + settings = PersistedSettings() + settings.update({"active_meta_profile": "balanced"}) + + # An unrelated update must not reset the meta-profile. + settings.update({"active_profile": "fast"}) + + assert settings.active_meta_profile == "balanced" + agent = settings.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + assert agent.active_meta_profile == "balanced" + + +def test_activate_on_acp_agent_does_not_set_facade() -> None: + """ACP agents cannot attach the routing tool, so the facade must stay clear. + + Otherwise ``GET /api/meta-profiles`` would report an active router that no + conversation built from the persisted ACP settings can actually use. + """ + settings = PersistedSettings(agent_settings=ACPAgentSettings()) + + settings.update({"active_meta_profile": "balanced"}) + + # Request dropped: the facade is not set and the variant is untouched. + assert settings.active_meta_profile is None + assert isinstance(settings.agent_settings, ACPAgentSettings) + + +def test_switching_to_acp_clears_active_meta_profile() -> None: + """Switching agent kind to ACP while a meta-profile is active clears it.""" + settings = PersistedSettings() + settings.update({"active_meta_profile": "balanced"}) + assert settings.active_meta_profile == "balanced" + + # Switch agent kind to ACP via an agent-settings diff (no meta-profile key). + settings.update({"agent_settings_diff": {"agent_kind": "acp"}}) + + assert settings.active_meta_profile is None + assert isinstance(settings.agent_settings, ACPAgentSettings) + + +def test_switching_back_to_openhands_can_reactivate() -> None: + """After an ACP round-trip, an OpenHands agent can activate routing again.""" + settings = PersistedSettings(agent_settings=ACPAgentSettings()) + settings.update({"active_meta_profile": "balanced"}) + assert settings.active_meta_profile is None + + # Switch back to an OpenHands agent and activate. + settings.update({"agent_settings_diff": {"agent_kind": "openhands"}}) + settings.update({"active_meta_profile": "balanced"}) + + assert settings.active_meta_profile == "balanced" + agent = settings.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + assert agent.active_meta_profile == "balanced" + assert agent.enable_classify_and_switch_llm_tool is True + + +def test_active_meta_profile_round_trips_through_serialization() -> None: + settings = PersistedSettings() + settings.update({"active_meta_profile": "balanced"}) + + dumped = settings.model_dump(mode="json") + assert dumped["active_meta_profile"] == "balanced" + + reloaded = PersistedSettings.from_persisted(dumped) + assert reloaded.active_meta_profile == "balanced" + agent = reloaded.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + assert agent.active_meta_profile == "balanced" diff --git a/tests/agent_server/test_meta_profiles_router.py b/tests/agent_server/test_meta_profiles_router.py new file mode 100644 index 0000000000..9a76d411c4 --- /dev/null +++ b/tests/agent_server/test_meta_profiles_router.py @@ -0,0 +1,277 @@ +"""Tests for meta_profiles_router endpoints.""" + +import tempfile +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path +from unittest.mock import patch + +import pytest +from fastapi.testclient import TestClient + +from openhands.agent_server.api import create_app +from openhands.agent_server.config import Config +from openhands.agent_server.persistence import get_settings_store, reset_stores +from openhands.sdk.llm.meta_profile_store import MetaProfile, MetaProfileStore +from openhands.sdk.settings.model import OpenHandsAgentSettings + + +@pytest.fixture +def temp_meta_profiles_dir(): + with tempfile.TemporaryDirectory() as tmpdir: + meta_dir = Path(tmpdir) / "meta-profiles" + meta_dir.mkdir(parents=True, exist_ok=True) + yield meta_dir + + +@pytest.fixture +def temp_settings_dir(): + with tempfile.TemporaryDirectory() as tmpdir: + settings_dir = Path(tmpdir) / "settings" + settings_dir.mkdir(parents=True, exist_ok=True) + yield settings_dir + + +@pytest.fixture +def client(temp_meta_profiles_dir, temp_settings_dir, monkeypatch): + reset_stores() + monkeypatch.setenv("OH_PERSISTENCE_DIR", str(temp_settings_dir)) + config = Config(static_files_path=None, session_api_keys=[], secret_key=None) + app = create_app(config) + with patch( + "openhands.agent_server.meta_profiles_router.MetaProfileStore", + lambda: MetaProfileStore(base_dir=temp_meta_profiles_dir), + ): + yield TestClient(app) + reset_stores() + + +@pytest.fixture +def store(temp_meta_profiles_dir): + return MetaProfileStore(base_dir=temp_meta_profiles_dir) + + +def _meta(classifier="minimax", default="gpt", classes=None) -> dict: + return MetaProfile.model_validate( + { + "classifier_model": classifier, + "default_model": default, + "classes": classes or [{"description": "UI", "model": "deepseek"}], + } + ).model_dump(mode="json") + + +# ── List ──────────────────────────────────────────────────────────────────── + + +def test_list_empty(client): + response = client.get("/api/meta-profiles") + assert response.status_code == 200 + body = response.json() + assert body["meta_profiles"] == [] + assert body["active_meta_profile"] is None + + +def test_list_returns_summaries(client, store): + store.save("balanced", MetaProfile.model_validate(_meta())) + response = client.get("/api/meta-profiles") + assert response.status_code == 200 + body = response.json() + assert body["meta_profiles"] == [ + { + "name": "balanced", + "classifier_model": "minimax", + "default_model": "gpt", + "num_classes": 1, + } + ] + + +# ── Get ────────────────────────────────────────────────────────────────────── + + +def test_get_meta_profile(client, store): + store.save("balanced", MetaProfile.model_validate(_meta())) + response = client.get("/api/meta-profiles/balanced") + assert response.status_code == 200 + body = response.json() + assert body["name"] == "balanced" + assert body["config"]["classifier_model"] == "minimax" + assert body["config"]["classes"][0]["model"] == "deepseek" + + +def test_get_missing_returns_404(client): + response = client.get("/api/meta-profiles/nope") + assert response.status_code == 404 + + +# ── Save ───────────────────────────────────────────────────────────────────── + + +def test_save_creates_meta_profile(client, store): + response = client.post("/api/meta-profiles/balanced", json=_meta()) + assert response.status_code == 201 + assert store.load("balanced").classifier_model == "minimax" + + +def test_save_overwrites(client, store): + client.post("/api/meta-profiles/p", json=_meta(classifier="a", default="b")) + response = client.post( + "/api/meta-profiles/p", json=_meta(classifier="c", default="d") + ) + assert response.status_code == 201 + assert store.load("p").classifier_model == "c" + + +def test_save_invalid_body_returns_422(client): + response = client.post("/api/meta-profiles/p", json={"default_model": "gpt"}) + assert response.status_code == 422 + + +def test_save_invalid_name_returns_422(client): + response = client.post("/api/meta-profiles/..bad..", json=_meta()) + assert response.status_code == 422 + + +def test_save_timeout_returns_503(client, monkeypatch): + """Save surfaces store lock TimeoutError as a retryable 503, not a 500.""" + + def boom(self, name, meta_profile, *, max_profiles=None): + raise TimeoutError("locked") + + monkeypatch.setattr(MetaProfileStore, "save", boom) + + response = client.post("/api/meta-profiles/anything", json=_meta()) + assert response.status_code == 503 + + +# ── Delete ─────────────────────────────────────────────────────────────────── + + +def test_delete_meta_profile(client, store): + store.save("p", MetaProfile.model_validate(_meta())) + response = client.delete("/api/meta-profiles/p") + assert response.status_code == 200 + assert store.list() == [] + + +def test_delete_is_idempotent(client): + response = client.delete("/api/meta-profiles/nope") + assert response.status_code == 200 + + +def test_delete_timeout_returns_503(client, monkeypatch): + """Delete surfaces store lock TimeoutError as a retryable 503, not a 500.""" + + def boom(self, name): + raise TimeoutError("locked") + + monkeypatch.setattr(MetaProfileStore, "delete", boom) + + response = client.delete("/api/meta-profiles/anything") + assert response.status_code == 503 + + +def test_delete_active_clears_active(client, store): + store.save("p", MetaProfile.model_validate(_meta())) + activate = client.post("/api/meta-profiles/p/activate") + assert activate.status_code == 200 + + client.delete("/api/meta-profiles/p") + + listed = client.get("/api/meta-profiles").json() + assert listed["active_meta_profile"] is None + + +def test_delete_active_clears_nested_agent_settings(client, store): + """Deleting the active meta-profile must clear the *nested* agent settings. + + Otherwise the routing tool stays enabled pointing at a now-deleted profile, + which breaks the next conversation. Guards against top-level/nested drift. + """ + store.save("p", MetaProfile.model_validate(_meta())) + client.post("/api/meta-profiles/p/activate") + + client.delete("/api/meta-profiles/p") + + persisted = get_settings_store().load() + assert persisted is not None + agent = persisted.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + assert agent.active_meta_profile is None + assert agent.enable_classify_and_switch_llm_tool is False + + +# ── Activate ───────────────────────────────────────────────────────────────── + + +def test_activate_sets_active_meta_profile(client, store): + store.save("p", MetaProfile.model_validate(_meta())) + response = client.post("/api/meta-profiles/p/activate") + assert response.status_code == 200 + assert response.json()["name"] == "p" + + listed = client.get("/api/meta-profiles").json() + assert listed["active_meta_profile"] == "p" + + +def test_activate_propagates_into_agent_settings(client, store): + """Activation must wire the *nested* agent settings, not just the facade. + + ``enable_classify_and_switch_llm_tool`` / ``active_meta_profile`` on + ``agent_settings`` are what actually attach the routing tool; if only the + top-level field is set, activation reports success but does nothing. + """ + store.save("p", MetaProfile.model_validate(_meta())) + response = client.post("/api/meta-profiles/p/activate") + assert response.status_code == 200 + + persisted = get_settings_store().load() + assert persisted is not None + assert persisted.active_meta_profile == "p" + agent = persisted.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + assert agent.active_meta_profile == "p" + assert agent.enable_classify_and_switch_llm_tool is True + + +def test_activate_missing_returns_404(client): + response = client.post("/api/meta-profiles/nope/activate") + assert response.status_code == 404 + + +def test_concurrent_delete_and_activate_keep_settings_consistent(client, store): + """Racing delete(active) against activate(other) must not corrupt settings. + + ``delete`` clears the active meta-profile and ``activate`` sets a new one; + both route through the file-locked settings store. Whatever the interleaving, + the persisted state must stay consistent: the active profile is never the + just-deleted one, and the nested ``agent_settings`` never drifts from the + top-level ``active_meta_profile``. + """ + store.save("q", MetaProfile.model_validate(_meta())) + + for _ in range(15): + # Re-establish "p" as the active profile before each race. + store.save("p", MetaProfile.model_validate(_meta())) + client.post("/api/meta-profiles/p/activate") + + with ThreadPoolExecutor(max_workers=2) as pool: + futures = [ + pool.submit(client.delete, "/api/meta-profiles/p"), + pool.submit(client.post, "/api/meta-profiles/q/activate"), + ] + for future in futures: + assert future.result().status_code == 200 + + persisted = get_settings_store().load() + assert persisted is not None + active = persisted.active_meta_profile + # "p" was deleted, so it must never remain active; outcome is "q" or None + # depending on which write committed last. + assert active in (None, "q") + + agent = persisted.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + # Nested agent settings must agree with the top-level facade. + assert agent.active_meta_profile == active + assert agent.enable_classify_and_switch_llm_tool is (active is not None) diff --git a/tests/agent_server/test_settings_router.py b/tests/agent_server/test_settings_router.py index 01656eb3e2..820856707a 100644 --- a/tests/agent_server/test_settings_router.py +++ b/tests/agent_server/test_settings_router.py @@ -642,7 +642,7 @@ def test_patch_settings_empty_payload_returns_400(client_with_settings): assert response.json()["detail"] == ( "At least one of agent_settings_diff, " "conversation_settings_diff, misc_settings_diff, " - "or active_profile must be provided" + "active_profile, or active_meta_profile must be provided" ) diff --git a/tests/sdk/llm/test_meta_profile_store.py b/tests/sdk/llm/test_meta_profile_store.py new file mode 100644 index 0000000000..acc81fc452 --- /dev/null +++ b/tests/sdk/llm/test_meta_profile_store.py @@ -0,0 +1,188 @@ +import json +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path + +import pytest + +from openhands.sdk.llm.meta_profile_store import ( + MetaProfile, + MetaProfileClass, + MetaProfileLimitExceeded, + MetaProfileStore, +) + + +def _write(base: Path, name: str, data: dict) -> None: + (base / f"{name}.json").write_text(json.dumps(data), encoding="utf-8") + + +VALID = { + "classifier_model": "minimax", + "default_model": "gpt", + "classes": [ + {"description": "UI / images", "model": "deepseek"}, + {"description": "research", "model": "gemini"}, + ], +} + + +def test_load_valid_meta_profile(tmp_path: Path) -> None: + _write(tmp_path, "balanced", VALID) + store = MetaProfileStore(base_dir=tmp_path) + + meta = store.load("balanced") + + assert isinstance(meta, MetaProfile) + assert meta.classifier_model == "minimax" + assert meta.default_model == "gpt" + assert [c.model for c in meta.classes] == ["deepseek", "gemini"] + + +def test_load_accepts_name_with_json_suffix(tmp_path: Path) -> None: + _write(tmp_path, "balanced", VALID) + store = MetaProfileStore(base_dir=tmp_path) + + assert store.load("balanced.json").classifier_model == "minimax" + + +def test_list_returns_sorted_valid_names(tmp_path: Path) -> None: + _write(tmp_path, "b", VALID) + _write(tmp_path, "a", VALID) + # A file with an invalid stem must be ignored by list(). + (tmp_path / ".hidden.json").write_text("{}", encoding="utf-8") + store = MetaProfileStore(base_dir=tmp_path) + + assert store.list() == ["a", "b"] + + +def test_load_missing_raises_file_not_found(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + + with pytest.raises(FileNotFoundError): + store.load("nope") + + +def test_load_invalid_name_raises_value_error(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + + with pytest.raises(ValueError): + store.load("../escape") + + +def test_load_corrupted_json_raises_value_error(tmp_path: Path) -> None: + (tmp_path / "broken.json").write_text("{not json", encoding="utf-8") + store = MetaProfileStore(base_dir=tmp_path) + + with pytest.raises(ValueError): + store.load("broken") + + +def test_load_schema_violation_raises_value_error(tmp_path: Path) -> None: + _write(tmp_path, "bad", {"default_model": "gpt"}) # missing classifier_model + store = MetaProfileStore(base_dir=tmp_path) + + with pytest.raises(ValueError): + store.load("bad") + + +def test_save_then_load_roundtrip(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + meta = MetaProfile( + classifier_model="minimax", + default_model="gpt", + classes=[MetaProfileClass(description="UI / images", model="deepseek")], + ) + + store.save("balanced", meta) + + loaded = store.load("balanced") + assert loaded.classifier_model == "minimax" + assert loaded.default_model == "gpt" + assert [c.model for c in loaded.classes] == ["deepseek"] + + +def test_save_overwrites_existing(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + store.save("p", MetaProfile(classifier_model="a", default_model="b")) + store.save("p", MetaProfile(classifier_model="c", default_model="d")) + + assert store.load("p").classifier_model == "c" + assert store.list() == ["p"] + + +def test_save_invalid_name_raises_value_error(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + + with pytest.raises(ValueError): + store.save("../escape", MetaProfile(classifier_model="a", default_model="b")) + + +def test_save_respects_max_profiles(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + store.save("a", MetaProfile(classifier_model="a", default_model="b")) + + with pytest.raises(MetaProfileLimitExceeded): + store.save( + "b", MetaProfile(classifier_model="a", default_model="b"), max_profiles=1 + ) + + # Overwriting an existing one is still allowed at the limit. + store.save( + "a", MetaProfile(classifier_model="x", default_model="y"), max_profiles=1 + ) + assert store.load("a").classifier_model == "x" + + +def test_concurrent_saves_respect_max_profiles(tmp_path: Path) -> None: + """The file lock must serialize the limit check + write across threads. + + Without locking, concurrent creators can each pass the ``max_profiles`` + check before any write lands and overshoot the limit (TOCTOU). + """ + store = MetaProfileStore(base_dir=tmp_path) + limit = 10 + attempts = 30 + + def attempt(i: int) -> bool: + try: + store.save( + f"p{i:02d}", + MetaProfile(classifier_model="a", default_model="b"), + max_profiles=limit, + ) + return True + except MetaProfileLimitExceeded: + return False + + with ThreadPoolExecutor(max_workers=attempts) as pool: + results = list(pool.map(attempt, range(attempts))) + + assert sum(results) == limit + assert len(store.list()) == limit + + +def test_delete_is_idempotent(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + store.save("p", MetaProfile(classifier_model="a", default_model="b")) + + store.delete("p") + assert store.list() == [] + # Deleting a missing meta-profile is a no-op. + store.delete("p") + + +def test_list_summaries_skips_corrupted(tmp_path: Path) -> None: + _write(tmp_path, "good", VALID) + (tmp_path / "broken.json").write_text("{not json", encoding="utf-8") + store = MetaProfileStore(base_dir=tmp_path) + + summaries = store.list_summaries() + + assert summaries == [ + { + "name": "good", + "classifier_model": "minimax", + "default_model": "gpt", + "num_classes": 2, + } + ] diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py index 9930c448f4..a93d2b0358 100644 --- a/tests/sdk/test_settings.py +++ b/tests/sdk/test_settings.py @@ -69,6 +69,8 @@ def test_llm_agent_settings_export_schema_groups_sections() -> None: "tools", "enable_sub_agents", "enable_switch_llm_tool", + "enable_classify_and_switch_llm_tool", + "active_meta_profile", "tool_concurrency_limit", "mcp_config", } @@ -342,6 +344,8 @@ def test_export_agent_settings_schema_emits_variant_tagged_sections() -> None: "tools", "enable_sub_agents", "enable_switch_llm_tool", + "enable_classify_and_switch_llm_tool", + "active_meta_profile", "tool_concurrency_limit", "mcp_config", } diff --git a/tests/sdk/tool/test_classify_and_switch_llm.py b/tests/sdk/tool/test_classify_and_switch_llm.py new file mode 100644 index 0000000000..1933d409d7 --- /dev/null +++ b/tests/sdk/tool/test_classify_and_switch_llm.py @@ -0,0 +1,422 @@ +import json +import types +from pathlib import Path + +import pytest + +from openhands.sdk import LLM, LocalConversation, OpenHandsAgentSettings +from openhands.sdk.agent import Agent +from openhands.sdk.llm import Message, TextContent, llm_profile_store +from openhands.sdk.llm.llm_profile_store import LLMProfileStore +from openhands.sdk.llm.meta_profile_store import MetaProfileStore +from openhands.sdk.testing import TestLLM +from openhands.sdk.tool.builtins import ( + ClassifyAndSwitchLLMAction, + ClassifyAndSwitchLLMExecutor, + ClassifyAndSwitchLLMObservation, + ClassifyAndSwitchLLMTool, +) +from openhands.sdk.tool.builtins.classify_and_switch_llm import ( + _recent_messages_text, + build_classifier_prompt, + parse_class_index, +) + + +META = { + "classifier_model": "classifier", + "default_model": "default", + "classes": [ + {"description": "UI / images", "model": "fast"}, + {"description": "tests", "model": "slow"}, + ], +} + + +def _make_llm(model: str, usage_id: str) -> LLM: + return TestLLM.from_messages([], model=model, usage_id=usage_id) + + +@pytest.fixture() +def meta_store(tmp_path: Path) -> MetaProfileStore: + meta_dir = tmp_path / "meta-profiles" + meta_dir.mkdir() + (meta_dir / "balanced.json").write_text(json.dumps(META), encoding="utf-8") + return MetaProfileStore(base_dir=meta_dir) + + +@pytest.fixture() +def profile_store(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> LLMProfileStore: + profile_dir = tmp_path / "profiles" + profile_dir.mkdir() + monkeypatch.setattr(llm_profile_store, "_DEFAULT_PROFILE_DIR", profile_dir) + store = LLMProfileStore(base_dir=profile_dir) + store.save("fast", _make_llm("fast-model", "fast")) + store.save("slow", _make_llm("slow-model", "slow")) + store.save("default", _make_llm("default-profile-model", "default-profile")) + return store + + +def _make_conversation() -> LocalConversation: + return LocalConversation( + agent=Agent(llm=_make_llm("default-model", "default"), tools=[]), + workspace=Path.cwd(), + ) + + +def _patch_classifier( + conversation: LocalConversation, + monkeypatch: pytest.MonkeyPatch, + reply: str, +) -> None: + """Make the classifier profile resolve to a scripted TestLLM.""" + real_load = conversation._profile_store.load + + def fake_load(name: str, *, cipher=None): + if name == "classifier": + return TestLLM.from_messages( + [Message(role="assistant", content=[TextContent(text=reply)])], + usage_id="classifier", + ) + return real_load(name, cipher=cipher) + + monkeypatch.setattr(conversation._profile_store, "load", fake_load) + + +# ── pure helpers ────────────────────────────────────────────────────────── + + +def test_build_classifier_prompt_lists_categories() -> None: + from openhands.sdk.llm.meta_profile_store import MetaProfile + + prompt = build_classifier_prompt(MetaProfile.model_validate(META)) + assert "1. UI / images" in prompt + assert "2. tests" in prompt + assert "respond with 0" in prompt.lower() + + +@pytest.mark.parametrize( + "reply,expected", + [ + ("1", 1), + ("2", 2), + ("0", 0), + ("9", 0), # out of range -> default + ("none", 0), # no integer -> default + ("Category 2 fits best", 2), + ], +) +def test_parse_class_index(reply: str, expected: int) -> None: + assert parse_class_index(reply, num_classes=2) == expected + + +def test_recent_messages_text_takes_last_n() -> None: + from openhands.sdk.event.llm_convertible.message import MessageEvent + + events = [ + MessageEvent( + source="user", + llm_message=Message(role="user", content=[TextContent(text=f"m{i}")]), + ) + for i in range(8) + ] + conv = types.SimpleNamespace(state=types.SimpleNamespace(events=events)) + + text = _recent_messages_text(conv, limit=3) # type: ignore[arg-type] + assert text == "user: m5\nuser: m6\nuser: m7" + + +# ── create() ────────────────────────────────────────────────────────────── + + +def test_create_falls_back_to_first_when_no_active_profile( + meta_store: MetaProfileStore, +) -> None: + # "balanced" is the only profile, so it is the first one resolved. + # Resolution is deferred to invocation time, so check the resolver directly. + tool = ClassifyAndSwitchLLMTool.create(meta_profile_store=meta_store)[0] + assert isinstance(tool.executor, ClassifyAndSwitchLLMExecutor) + assert tool.executor._resolve_meta_profile().classifier_model == "classifier" + + +def test_create_picks_alphabetically_first_meta_profile( + meta_store: MetaProfileStore, +) -> None: + # Add a second profile whose name sorts before "balanced". + other = dict(META) + other["classifier_model"] = "other-classifier" + (Path(meta_store.base_dir) / "aaa.json").write_text( + json.dumps(other), encoding="utf-8" + ) + assert meta_store.list()[0] == "aaa" + tool = ClassifyAndSwitchLLMTool.create(meta_profile_store=meta_store)[0] + assert isinstance(tool.executor, ClassifyAndSwitchLLMExecutor) + assert tool.executor._resolve_meta_profile().classifier_model == "other-classifier" + + +def test_create_does_not_touch_disk_when_no_meta_profiles_exist( + tmp_path: Path, +) -> None: + # Deferred resolution: create() must not read the store, so an empty (or + # missing) meta-profile dir cannot break agent/conversation startup. + empty_store = MetaProfileStore(base_dir=tmp_path / "empty") + tools = ClassifyAndSwitchLLMTool.create(meta_profile_store=empty_store) + assert len(tools) == 1 + assert isinstance(tools[0].executor, ClassifyAndSwitchLLMExecutor) + + +def test_create_does_not_load_missing_active_meta_profile( + meta_store: MetaProfileStore, +) -> None: + # A dangling active_meta_profile must not raise at create() time. + tool = ClassifyAndSwitchLLMTool.create( + active_meta_profile="does-not-exist", meta_profile_store=meta_store + )[0] + assert isinstance(tool.executor, ClassifyAndSwitchLLMExecutor) + + +def test_create_rejects_unknown_params(meta_store: MetaProfileStore) -> None: + with pytest.raises(ValueError): + ClassifyAndSwitchLLMTool.create( + active_meta_profile="balanced", + meta_profile_store=meta_store, + bogus=1, + ) + + +def test_create_loads_meta_profile(meta_store: MetaProfileStore) -> None: + tool = ClassifyAndSwitchLLMTool.create( + active_meta_profile="balanced", meta_profile_store=meta_store + )[0] + assert tool.name == "classify_and_switch_llm" + assert isinstance(tool.executor, ClassifyAndSwitchLLMExecutor) + assert tool.executor._resolve_meta_profile().classifier_model == "classifier" + + +# ── executor ────────────────────────────────────────────────────────────── + + +def _register_tool( + conversation: LocalConversation, meta_store: MetaProfileStore +) -> None: + tool = ClassifyAndSwitchLLMTool.create( + active_meta_profile="balanced", meta_profile_store=meta_store + )[0] + conversation._ensure_agent_ready() + conversation.agent.tools_map[tool.name] = tool + + +def test_executor_switches_to_matched_class( + profile_store, meta_store, monkeypatch +) -> None: + conversation = _make_conversation() + _register_tool(conversation, meta_store) + _patch_classifier(conversation, monkeypatch, reply="1") + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert isinstance(obs, ClassifyAndSwitchLLMObservation) + assert not obs.is_error + assert obs.model == "fast" + assert obs.chosen_class == "UI / images" + assert conversation.agent.llm.model == "fast-model" + + +def test_classifier_call_is_accounted_in_conversation_stats( + profile_store, meta_store, monkeypatch +) -> None: + """The classifier completion must spend through the conversation registry. + + Regression for the budget-bypass bug: the classifier LLM has to be + registered in ``conversation.llm_registry`` (under a stable usage id) so its + tokens/cost land in ``conversation_stats`` and count against + ``max_budget_per_run`` — an unregistered LLM would spend off the books. + """ + conversation = _make_conversation() + _register_tool(conversation, meta_store) + _patch_classifier(conversation, monkeypatch, reply="1") + + usage_id = "classifier:classifier" + assert usage_id not in conversation.conversation_stats.usage_to_metrics + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert isinstance(obs, ClassifyAndSwitchLLMObservation) + assert not obs.is_error + # The classifier LLM is now both registered and tracked by the stats, so + # its spend is included in the combined (budget-enforced) metrics. + assert usage_id in conversation.llm_registry.list_usage_ids() + assert usage_id in conversation.conversation_stats.usage_to_metrics + + +def test_repeated_routing_reuses_one_classifier_usage_bucket( + profile_store, meta_store, monkeypatch +) -> None: + """A second routing call must reuse the cached classifier, not re-register.""" + conversation = _make_conversation() + _register_tool(conversation, meta_store) + _patch_classifier(conversation, monkeypatch, reply="1") + + conversation.execute_tool("classify_and_switch_llm", ClassifyAndSwitchLLMAction()) + conversation.execute_tool("classify_and_switch_llm", ClassifyAndSwitchLLMAction()) + + usage_ids = conversation.llm_registry.list_usage_ids() + assert usage_ids.count("classifier:classifier") == 1 + + +def test_executor_falls_back_to_default_when_no_class( + profile_store, meta_store, monkeypatch +) -> None: + conversation = _make_conversation() + _register_tool(conversation, meta_store) + _patch_classifier(conversation, monkeypatch, reply="0") + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert isinstance(obs, ClassifyAndSwitchLLMObservation) + assert not obs.is_error + assert obs.model == "default" + assert obs.chosen_class is None + assert conversation.agent.llm.model == "default-profile-model" + + +def test_executor_errors_when_classifier_profile_missing( + profile_store, meta_store +) -> None: + conversation = _make_conversation() + _register_tool(conversation, meta_store) + # No "classifier" profile saved and no patch -> load fails. + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert obs.is_error + assert "classifier" in obs.text + assert conversation.agent.llm.model == "default-model" + + +def test_executor_errors_when_target_profile_missing( + profile_store, meta_store, monkeypatch +) -> None: + conversation = _make_conversation() + _register_tool(conversation, meta_store) + # classifier picks class 2 -> "slow", but remove it from the store. + (Path(profile_store.base_dir) / "slow.json").unlink() + _patch_classifier(conversation, monkeypatch, reply="2") + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert isinstance(obs, ClassifyAndSwitchLLMObservation) + assert obs.is_error + assert obs.model == "slow" + assert "not found" in obs.text + + +def test_executor_errors_when_no_meta_profile_available( + profile_store, tmp_path +) -> None: + # Empty store + no active profile: invocation (not startup) reports the error. + empty_store = MetaProfileStore(base_dir=tmp_path / "empty") + conversation = _make_conversation() + tool = ClassifyAndSwitchLLMTool.create(meta_profile_store=empty_store)[0] + conversation._ensure_agent_ready() + conversation.agent.tools_map[tool.name] = tool + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert obs.is_error + assert "no meta-profile" in obs.text.lower() + assert conversation.agent.llm.model == "default-model" + + +def test_executor_errors_when_active_meta_profile_missing( + profile_store, meta_store +) -> None: + # Dangling active_meta_profile: invocation reports the error, no crash. + conversation = _make_conversation() + tool = ClassifyAndSwitchLLMTool.create( + active_meta_profile="does-not-exist", meta_profile_store=meta_store + )[0] + conversation._ensure_agent_ready() + conversation.agent.tools_map[tool.name] = tool + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert obs.is_error + assert "resolve" in obs.text.lower() + + +def test_missing_active_meta_profile_does_not_break_startup( + meta_store, monkeypatch +) -> None: + # The whole point of deferring the load: a missing active meta-profile must + # not break _ensure_agent_ready() / conversation startup. + monkeypatch.setattr( + "openhands.sdk.llm.meta_profile_store._DEFAULT_META_PROFILE_DIR", + meta_store.base_dir, + ) + agent = OpenHandsAgentSettings( + llm=_make_llm("default-model", "default"), + enable_classify_and_switch_llm_tool=True, + active_meta_profile="does-not-exist", + ).create_agent() + conversation = LocalConversation(agent=agent, workspace=Path.cwd()) + + # Must not raise even though the active meta-profile file does not exist. + conversation._ensure_agent_ready() + + assert any(t.name == "ClassifyAndSwitchLLMTool" for t in agent.tools) + + +# ── create_agent wiring ───────────────────────────────────────────────────── + + +def test_create_agent_adds_tool_when_enabled(meta_store, monkeypatch) -> None: + monkeypatch.setattr( + "openhands.sdk.llm.meta_profile_store._DEFAULT_META_PROFILE_DIR", + meta_store.base_dir, + ) + agent = OpenHandsAgentSettings( + llm=_make_llm("default-model", "default"), + enable_classify_and_switch_llm_tool=True, + active_meta_profile="balanced", + ).create_agent() + + assert any(t.name == "ClassifyAndSwitchLLMTool" for t in agent.tools) + + +def test_create_agent_adds_tool_when_enabled_without_active_profile( + meta_store, monkeypatch +) -> None: + monkeypatch.setattr( + "openhands.sdk.llm.meta_profile_store._DEFAULT_META_PROFILE_DIR", + meta_store.base_dir, + ) + # No active_meta_profile: the tool is still wired and resolves the first one. + agent = OpenHandsAgentSettings( + llm=_make_llm("default-model", "default"), + enable_classify_and_switch_llm_tool=True, + ).create_agent() + + assert any(t.name == "ClassifyAndSwitchLLMTool" for t in agent.tools) + + +def test_create_agent_omits_tool_when_disabled() -> None: + agent = OpenHandsAgentSettings( + llm=_make_llm("default-model", "default"), + ).create_agent() + + assert not any(t.name == "ClassifyAndSwitchLLMTool" for t in agent.tools)