From 1ced902e543e507b1e0bf6582adfafcd62960228 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 15 Jun 2026 22:59:26 -0300 Subject: [PATCH 1/8] Add intelligent model router (ClassifyAndSwitchLLM tool + meta-profiles) Introduce a meta-profile driven model router: - MetaProfile/MetaProfileStore (~/.openhands/meta-profiles) mirroring LLMProfileStore. - ClassifyAndSwitchLLM builtin tool: reads active meta-profile, runs one classifier call over recent conversation messages, and switches to the chosen LLM profile (falls back to default_model). - Wire tool through OpenHandsAgentSettings.create_agent via new enable_classify_and_switch_llm_tool + active_meta_profile fields. - Server parity (Option A): active_meta_profile in PersistedSettings, SettingsResponse/SettingsUpdateRequest, and settings_router; update() propagates it into agent_settings and toggles the enable flag. - Tests for store, helpers, executor paths, create_agent wiring, and server persistence/propagation. Co-authored-by: openhands --- .../agent_server/persistence/models.py | 21 ++ .../openhands/agent_server/settings_router.py | 6 +- openhands-sdk/openhands/sdk/llm/__init__.py | 8 + .../openhands/sdk/llm/meta_profile_store.py | 110 +++++++ .../openhands/sdk/settings/api_models.py | 9 + openhands-sdk/openhands/sdk/settings/model.py | 50 ++- .../openhands/sdk/tool/builtins/__init__.py | 11 + .../tool/builtins/classify_and_switch_llm.py | 291 ++++++++++++++++++ .../agent_server/test_active_meta_profile.py | 57 ++++ tests/agent_server/test_settings_router.py | 2 +- tests/sdk/llm/test_meta_profile_store.py | 82 +++++ tests/sdk/test_settings.py | 4 + .../sdk/tool/test_classify_and_switch_llm.py | 261 ++++++++++++++++ 13 files changed, 908 insertions(+), 4 deletions(-) create mode 100644 openhands-sdk/openhands/sdk/llm/meta_profile_store.py create mode 100644 openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py create mode 100644 tests/agent_server/test_active_meta_profile.py create mode 100644 tests/sdk/llm/test_meta_profile_store.py create mode 100644 tests/sdk/tool/test_classify_and_switch_llm.py diff --git a/openhands-agent-server/openhands/agent_server/persistence/models.py b/openhands-agent-server/openhands/agent_server/persistence/models.py index e5c1b6f714..87f7065539 100644 --- a/openhands-agent-server/openhands/agent_server/persistence/models.py +++ b/openhands-agent-server/openhands/agent_server/persistence/models.py @@ -55,6 +55,7 @@ class SettingsUpdatePayload(TypedDict, total=False): conversation_settings_diff: dict[str, Any] misc_settings_diff: dict[str, Any] active_profile: str | None + active_meta_profile: str | None def _deep_merge( @@ -140,6 +141,11 @@ class PersistedSettings(BaseModel): default=None, description="Name of the currently active LLM profile.", ) + active_meta_profile: str | None = Field( + default=None, + description="Name of the currently active meta-profile used for " + "intelligent model routing.", + ) misc_settings: dict[str, Any] = Field( default_factory=dict, description=( @@ -248,6 +254,21 @@ def update(self, payload: SettingsUpdatePayload) -> None: # Update active_profile if explicitly provided (including None to clear) if "active_profile" in payload: self.active_profile = payload["active_profile"] + + # Update active_meta_profile and propagate it into agent_settings so + # the agent built from these settings enables and uses the routing + # tool (mirrors how activating a profile bakes the LLM into + # agent_settings). ACP agent settings lack these fields, so guard. + if "active_meta_profile" in payload: + name = payload["active_meta_profile"] + self.active_meta_profile = name + if "active_meta_profile" in type(self.agent_settings).model_fields: + self.agent_settings = self.agent_settings.model_copy( + update={ + "active_meta_profile": name, + "enable_classify_and_switch_llm_tool": name is not None, + } + ) finally: # Clear conv_merged to minimize plaintext exposure window if conv_merged is not None: diff --git a/openhands-agent-server/openhands/agent_server/settings_router.py b/openhands-agent-server/openhands/agent_server/settings_router.py index 5c3b410a58..4876cd2bf4 100644 --- a/openhands-agent-server/openhands/agent_server/settings_router.py +++ b/openhands-agent-server/openhands/agent_server/settings_router.py @@ -161,6 +161,7 @@ async def get_settings(request: Request) -> SettingsResponse: ), llm_api_key_is_set=settings.llm_api_key_is_set, active_profile=settings.active_profile, + active_meta_profile=settings.active_meta_profile, misc_settings=settings.misc_settings, ) @@ -206,6 +207,8 @@ async def update_settings( update_data = payload.model_dump(exclude_none=True) if "active_profile" in payload.model_fields_set: update_data["active_profile"] = payload.active_profile + if "active_meta_profile" in payload.model_fields_set: + update_data["active_meta_profile"] = payload.active_meta_profile if not update_data: # No updates provided - this is a client error raise HTTPException( @@ -213,7 +216,7 @@ async def update_settings( detail=( "At least one of agent_settings_diff, " "conversation_settings_diff, misc_settings_diff, " - "or active_profile must be provided" + "active_profile, or active_meta_profile must be provided" ), ) @@ -269,6 +272,7 @@ def apply_update(settings: PersistedSettings) -> PersistedSettings: conversation_settings=settings.conversation_settings.model_dump(mode="json"), llm_api_key_is_set=settings.llm_api_key_is_set, active_profile=settings.active_profile, + active_meta_profile=settings.active_meta_profile, misc_settings=settings.misc_settings, ) diff --git a/openhands-sdk/openhands/sdk/llm/__init__.py b/openhands-sdk/openhands/sdk/llm/__init__.py index 1a823532a9..26bd6a158a 100644 --- a/openhands-sdk/openhands/sdk/llm/__init__.py +++ b/openhands-sdk/openhands/sdk/llm/__init__.py @@ -19,6 +19,11 @@ ThinkingBlock, content_to_str, ) +from openhands.sdk.llm.meta_profile_store import ( + MetaProfile, + MetaProfileClass, + MetaProfileStore, +) from openhands.sdk.llm.router import RouterLLM from openhands.sdk.llm.streaming import ( AsyncTokenCallbackType, @@ -46,6 +51,9 @@ "LLM_PROFILE_SCHEMA_VERSION", "LLMRegistry", "LLMProfileStore", + "MetaProfile", + "MetaProfileClass", + "MetaProfileStore", "RouterLLM", "RegistryEvent", # Messages diff --git a/openhands-sdk/openhands/sdk/llm/meta_profile_store.py b/openhands-sdk/openhands/sdk/llm/meta_profile_store.py new file mode 100644 index 0000000000..1d4082f27d --- /dev/null +++ b/openhands-sdk/openhands/sdk/llm/meta_profile_store.py @@ -0,0 +1,110 @@ +"""Storage for *meta-profiles*: declarative model-routing configurations. + +A meta-profile describes how to pick an LLM for a task. It names a +``classifier_model`` used to categorize the task, a ``default_model`` to fall +back to, and a list of ``classes`` mapping a natural-language task description +to the model that should handle it. + +Every model reference (``classifier_model``, ``default_model`` and each +class's ``model``) is the *name of a saved LLM profile* in +:class:`~openhands.sdk.llm.llm_profile_store.LLMProfileStore`, so credentials +and provider settings are resolved through the existing profile machinery. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Final + +from pydantic import BaseModel, Field + +from openhands.sdk.llm.llm_profile_store import PROFILE_NAME_REGEX +from openhands.sdk.logger import get_logger + + +_DEFAULT_META_PROFILE_DIR: Final[Path] = Path.home() / ".openhands" / "meta-profiles" + +logger = get_logger(__name__) + + +class MetaProfileClass(BaseModel): + """A single task category and the LLM profile that should handle it.""" + + description: str = Field( + description="Natural-language description of the kind of task this " + "class covers (e.g. 'task is UI oriented or requires looking at images')." + ) + model: str = Field( + description="Name of the saved LLM profile to switch to for tasks " + "matching this class." + ) + + +class MetaProfile(BaseModel): + """A declarative model-routing configuration.""" + + classifier_model: str = Field( + description="Name of the saved LLM profile used to classify the task." + ) + default_model: str = Field( + description="Name of the saved LLM profile to use when no class matches." + ) + classes: list[MetaProfileClass] = Field( + default_factory=list, + description="Ordered list of task classes and their target profiles.", + ) + + +class MetaProfileStore: + """Read meta-profiles from ``~/.openhands/meta-profiles`` (by default).""" + + def __init__(self, base_dir: Path | str | None = None) -> None: + """Initialize the meta-profile store. + + Args: + base_dir: Directory where meta-profiles are stored. Defaults to + ``~/.openhands/meta-profiles`` when ``None``. + """ + self.base_dir = ( + Path(base_dir) if base_dir is not None else _DEFAULT_META_PROFILE_DIR + ) + self.base_dir.mkdir(parents=True, exist_ok=True) + + def list(self) -> list[str]: + """Return the names (without ``.json``) of all stored meta-profiles.""" + return sorted( + p.stem + for p in self.base_dir.glob("*.json") + if PROFILE_NAME_REGEX.match(p.stem) + ) + + def _get_path(self, name: str) -> Path: + clean_name = name.removesuffix(".json") + if not PROFILE_NAME_REGEX.match(clean_name): + raise ValueError( + f"Invalid meta-profile name: {name!r}. " + "Names must be 1-64 characters, start with a letter or digit, " + "and contain only letters, digits, '.', '_', or '-'." + ) + return self.base_dir / f"{clean_name}.json" + + def load(self, name: str) -> MetaProfile: + """Load a meta-profile by name. + + Raises: + FileNotFoundError: If the meta-profile does not exist. + ValueError: If the file is corrupted or fails validation. + """ + path = self._get_path(name) + if not path.exists(): + existing = self.list() + raise FileNotFoundError( + f"Meta-profile `{name}` not found. " + f"Available meta-profiles: {', '.join(existing) or 'none'}" + ) + try: + data = json.loads(path.read_text(encoding="utf-8")) + return MetaProfile.model_validate(data) + except Exception as e: + raise ValueError(f"Failed to load meta-profile `{name}`: {e}") from e diff --git a/openhands-sdk/openhands/sdk/settings/api_models.py b/openhands-sdk/openhands/sdk/settings/api_models.py index 52504941fe..247c96aee2 100644 --- a/openhands-sdk/openhands/sdk/settings/api_models.py +++ b/openhands-sdk/openhands/sdk/settings/api_models.py @@ -69,6 +69,10 @@ class SettingsResponse(BaseModel): default=None, description="Name of the currently active LLM profile, if one is selected.", ) + active_meta_profile: str | None = Field( + default=None, + description="Name of the currently active meta-profile, if one is selected.", + ) misc_settings: dict[str, Any] = Field(default_factory=dict) def get_agent_settings(self) -> AgentSettingsConfig: @@ -113,6 +117,11 @@ class SettingsUpdateRequest(BaseModel): pattern=PROFILE_NAME_PATTERN, description="Name of the active LLM profile to persist; null clears it.", ) + active_meta_profile: str | None = Field( + default=None, + pattern=PROFILE_NAME_PATTERN, + description="Name of the active meta-profile to persist; null clears it.", + ) # ── Secrets API Models ──────────────────────────────────────────────────── diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index 624aa9402e..44b60c76f9 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -1025,6 +1025,35 @@ class OpenHandsAgentSettings(AgentSettingsBase): ).model_dump() }, ) + enable_classify_and_switch_llm_tool: bool = Field( + default=False, + description=( + "Enable the built-in classify_and_switch_llm tool, which routes the " + "task to the best LLM profile using the active meta-profile. Requires " + "active_meta_profile to be set." + ), + json_schema_extra={ + SETTINGS_METADATA_KEY: SettingsFieldMetadata( + label="Enable intelligent model routing tool", + prominence=SettingProminence.MINOR, + variant="openhands", + ).model_dump() + }, + ) + active_meta_profile: str | None = Field( + default=None, + description=( + "Name of the active meta-profile (in ~/.openhands/meta-profiles) used " + "by the classify_and_switch_llm tool to route tasks to LLM profiles." + ), + json_schema_extra={ + SETTINGS_METADATA_KEY: SettingsFieldMetadata( + label="Active meta-profile", + prominence=SettingProminence.MINOR, + variant="openhands", + ).model_dump() + }, + ) tool_concurrency_limit: int = Field( default=1, ge=1, @@ -1119,7 +1148,12 @@ def create_agent(self) -> Agent: """ from openhands.sdk.agent import Agent from openhands.sdk.llm.auth.openai import create_subscription_llm_from_config - from openhands.sdk.tool.builtins import BUILT_IN_TOOLS, SwitchLLMTool + from openhands.sdk.tool import Tool + from openhands.sdk.tool.builtins import ( + BUILT_IN_TOOLS, + ClassifyAndSwitchLLMTool, + SwitchLLMTool, + ) # Bypass ``_serialize_mcp_config``: MCP servers need real env/headers. mcp_config = ( @@ -1131,11 +1165,23 @@ def create_agent(self) -> Agent: if self.enable_switch_llm_tool: include_default_tools.append(SwitchLLMTool.__name__) + # The routing tool needs the active meta-profile name, which the + # name-only ``include_default_tools`` path cannot pass, so add it as a + # ``Tool`` spec carrying the param. + tools = list(self.tools) + if self.enable_classify_and_switch_llm_tool and self.active_meta_profile: + tools.append( + Tool( + name=ClassifyAndSwitchLLMTool.__name__, + params={"active_meta_profile": self.active_meta_profile}, + ) + ) + llm = create_subscription_llm_from_config(self.llm) condenser = None if llm.is_subscription else self.build_condenser(llm) return Agent( llm=llm, - tools=self.tools, + tools=tools, mcp_config=mcp_config, include_default_tools=include_default_tools, agent_context=self.agent_context, diff --git a/openhands-sdk/openhands/sdk/tool/builtins/__init__.py b/openhands-sdk/openhands/sdk/tool/builtins/__init__.py index 15dbf75e67..abc6e12f6e 100644 --- a/openhands-sdk/openhands/sdk/tool/builtins/__init__.py +++ b/openhands-sdk/openhands/sdk/tool/builtins/__init__.py @@ -5,6 +5,12 @@ For tools that require interacting with the environment, add them to `openhands-tools`. """ +from openhands.sdk.tool.builtins.classify_and_switch_llm import ( + ClassifyAndSwitchLLMAction, + ClassifyAndSwitchLLMExecutor, + ClassifyAndSwitchLLMObservation, + ClassifyAndSwitchLLMTool, +) from openhands.sdk.tool.builtins.finish import ( FinishAction, FinishExecutor, @@ -43,11 +49,16 @@ **{tool.__name__: tool for tool in BUILT_IN_TOOLS}, InvokeSkillTool.__name__: InvokeSkillTool, SwitchLLMTool.__name__: SwitchLLMTool, + ClassifyAndSwitchLLMTool.__name__: ClassifyAndSwitchLLMTool, } __all__ = [ "BUILT_IN_TOOLS", "BUILT_IN_TOOL_CLASSES", + "ClassifyAndSwitchLLMTool", + "ClassifyAndSwitchLLMAction", + "ClassifyAndSwitchLLMObservation", + "ClassifyAndSwitchLLMExecutor", "FinishTool", "FinishAction", "FinishObservation", diff --git a/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py b/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py new file mode 100644 index 0000000000..41929a7a3b --- /dev/null +++ b/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py @@ -0,0 +1,291 @@ +"""Built-in tool that classifies the current task and switches LLM profile. + +The tool reads the *active meta-profile* (see +:class:`~openhands.sdk.llm.meta_profile_store.MetaProfile`), asks the +meta-profile's ``classifier_model`` to categorize the current task using the +last few conversation messages, and switches the conversation to the LLM +profile mapped to the chosen class (falling back to ``default_model``). +""" + +import re +from collections.abc import Sequence +from typing import TYPE_CHECKING, Self + +from pydantic import Field +from rich.text import Text + +from openhands.sdk.llm.meta_profile_store import MetaProfile, MetaProfileStore +from openhands.sdk.logger import get_logger +from openhands.sdk.tool.registry import register_tool +from openhands.sdk.tool.tool import ( + Action, + Observation, + ToolAnnotations, + ToolDefinition, + ToolExecutor, +) + + +if TYPE_CHECKING: + from openhands.sdk.conversation.impl.local_conversation import LocalConversation + from openhands.sdk.conversation.state import ConversationState + + +logger = get_logger(__name__) + +# Number of trailing conversation messages shown to the classifier. +_RECENT_MESSAGE_LIMIT = 6 + + +class ClassifyAndSwitchLLMAction(Action): + """Trigger classification of the current task and switch the LLM profile.""" + + @property + def visualize(self) -> Text: + content = Text() + content.append("Classify task and switch LLM", style="bold magenta") + return content + + +class ClassifyAndSwitchLLMObservation(Observation): + """Result of classifying the task and switching the LLM profile.""" + + chosen_class: str | None = Field( + default=None, + description="Description of the matched class, or None when the " + "default model was used.", + ) + model: str | None = Field( + default=None, + description="Name of the LLM profile that was activated.", + ) + active_model: str | None = Field( + default=None, + description="Model configured by the activated profile, when available.", + ) + + @property + def visualize(self) -> Text: + content = Text() + if self.is_error: + content.append("Failed to classify and switch LLM", style="bold red") + else: + content.append("Classified task and switched LLM", style="bold green") + if self.model: + content.append(f": {self.model}") + if self.active_model: + content.append(f" ({self.active_model})") + if self.chosen_class: + content.append("\nMatched class: ", style="bold") + content.append(self.chosen_class) + return content + + +def build_classifier_prompt(meta: MetaProfile) -> str: + """Build the classifier system prompt listing the meta-profile classes.""" + lines = [ + "You are a model-routing classifier. Based on the recent conversation, " + "pick the single category that best describes the current task.", + "", + "Categories:", + ] + for i, cls in enumerate(meta.classes, start=1): + lines.append(f"{i}. {cls.description}") + lines.append("") + lines.append( + "Respond with ONLY the number of the best matching category. " + "If none of the categories clearly apply, respond with 0." + ) + return "\n".join(lines) + + +def parse_class_index(text: str, num_classes: int) -> int: + """Parse the classifier reply into a class index. + + Returns 0 (use ``default_model``) when no in-range integer is found. + """ + match = re.search(r"-?\d+", text) + if match is None: + return 0 + index = int(match.group()) + if 1 <= index <= num_classes: + return index + return 0 + + +def _recent_messages_text( + conversation: "LocalConversation", limit: int = _RECENT_MESSAGE_LIMIT +) -> str: + """Return a transcript of the last ``limit`` conversation messages.""" + from openhands.sdk.event.llm_convertible.message import MessageEvent + from openhands.sdk.llm import content_to_str + + messages: list[str] = [] + for event in conversation.state.events: + if not isinstance(event, MessageEvent): + continue + text = "\n".join(content_to_str(event.llm_message.content)).strip() + if text: + messages.append(f"{event.source}: {text}") + return "\n".join(messages[-limit:]) + + +class ClassifyAndSwitchLLMExecutor(ToolExecutor): + def __init__(self, meta_profile: MetaProfile) -> None: + self.meta_profile = meta_profile + + def __call__( + self, + action: ClassifyAndSwitchLLMAction, # noqa: ARG002 + conversation: "LocalConversation | None" = None, + ) -> ClassifyAndSwitchLLMObservation: + from openhands.sdk.llm import Message, TextContent, content_to_str + + if conversation is None: + return ClassifyAndSwitchLLMObservation.from_text( + text="Cannot classify and switch LLM without an active conversation.", + is_error=True, + ) + + meta = self.meta_profile + # 1) Load the classifier LLM (a saved profile), respecting at-rest cipher. + try: + classifier_llm = conversation._profile_store.load( + meta.classifier_model, cipher=conversation._cipher + ) + except (FileNotFoundError, ValueError) as exc: + return ClassifyAndSwitchLLMObservation.from_text( + text=( + f"Failed to load classifier profile " + f"'{meta.classifier_model}': {exc}" + ), + is_error=True, + ) + + # 2) Single classifier call over the recent conversation. + transcript = _recent_messages_text(conversation) or "(no messages yet)" + messages = [ + Message( + role="system", + content=[TextContent(text=build_classifier_prompt(meta))], + ), + Message( + role="user", + content=[ + TextContent( + text=f"Recent conversation:\n{transcript}\n\nCategory number:" + ) + ], + ), + ] + try: + response = classifier_llm.completion(messages) + except Exception as exc: + return ClassifyAndSwitchLLMObservation.from_text( + text=f"Classifier call failed: {type(exc).__name__}: {exc}", + is_error=True, + ) + + reply = "\n".join(content_to_str(response.message.content)) + index = parse_class_index(reply, len(meta.classes)) + + # 3) Resolve the target profile (chosen class or default). + if index == 0: + target_profile = meta.default_model + chosen_class = None + else: + chosen = meta.classes[index - 1] + target_profile = chosen.model + chosen_class = chosen.description + + # 4) Switch the conversation to the target profile. + try: + conversation.switch_profile(target_profile) + except FileNotFoundError: + return ClassifyAndSwitchLLMObservation.from_text( + text=f"Target LLM profile '{target_profile}' was not found.", + is_error=True, + model=target_profile, + chosen_class=chosen_class, + ) + except Exception as exc: + return ClassifyAndSwitchLLMObservation.from_text( + text=( + f"Failed to switch to LLM profile '{target_profile}': " + f"{type(exc).__name__}: {exc}" + ), + is_error=True, + model=target_profile, + chosen_class=chosen_class, + ) + + active_model = conversation.agent.llm.model + label = chosen_class or "default" + return ClassifyAndSwitchLLMObservation.from_text( + text=( + f"Classified task as '{label}' and switched to LLM profile " + f"'{target_profile}' (model '{active_model}'). " + "Future agent steps will use this profile." + ), + chosen_class=chosen_class, + model=target_profile, + active_model=active_model, + ) + + +_DESCRIPTION = ( + "Classify the current task and switch this conversation to the most " + "suitable saved LLM profile.\n\n" + "Use this near the start of a task (or when the task changes) to route " + "the work to the best model. A classifier model inspects the recent " + "conversation and picks a category from the active meta-profile; the " + "conversation then switches to that category's LLM profile. The switch " + "takes effect on the next LLM call." +) + + +class ClassifyAndSwitchLLMTool( + ToolDefinition[ClassifyAndSwitchLLMAction, ClassifyAndSwitchLLMObservation] +): + """Tool that classifies the task and switches to a meta-profile's LLM.""" + + @classmethod + def create( + cls, + conv_state: "ConversationState | None" = None, # noqa: ARG003 + active_meta_profile: str | None = None, + meta_profile_store: MetaProfileStore | None = None, + **params, + ) -> Sequence[Self]: + if params: + raise ValueError( + "ClassifyAndSwitchLLMTool only accepts 'active_meta_profile' " + "and 'meta_profile_store'." + ) + if not active_meta_profile: + raise ValueError( + "ClassifyAndSwitchLLMTool requires an 'active_meta_profile' name." + ) + + store = meta_profile_store or MetaProfileStore() + meta = store.load(active_meta_profile) + return [ + cls( + description=_DESCRIPTION, + action_type=ClassifyAndSwitchLLMAction, + observation_type=ClassifyAndSwitchLLMObservation, + executor=ClassifyAndSwitchLLMExecutor(meta), + annotations=ToolAnnotations( + readOnlyHint=False, + destructiveHint=False, + idempotentHint=False, + openWorldHint=False, + ), + ) + ] + + +# Registered so it can be resolved from a ``Tool`` spec carrying the +# ``active_meta_profile`` param (the built-in ``include_default_tools`` path +# cannot pass params). +register_tool(ClassifyAndSwitchLLMTool.__name__, ClassifyAndSwitchLLMTool) diff --git a/tests/agent_server/test_active_meta_profile.py b/tests/agent_server/test_active_meta_profile.py new file mode 100644 index 0000000000..3631cc996c --- /dev/null +++ b/tests/agent_server/test_active_meta_profile.py @@ -0,0 +1,57 @@ +"""Tests for active_meta_profile persistence and propagation (Option A).""" + +from openhands.agent_server.persistence.models import PersistedSettings +from openhands.sdk.settings.model import OpenHandsAgentSettings + + +def test_update_sets_top_level_and_propagates_into_agent_settings() -> None: + settings = PersistedSettings() + + settings.update({"active_meta_profile": "balanced"}) + + assert settings.active_meta_profile == "balanced" + # OpenHandsAgentSettings (default) gains the routing tool config. + agent = settings.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + assert agent.active_meta_profile == "balanced" + assert agent.enable_classify_and_switch_llm_tool is True + + +def test_update_clearing_disables_tool() -> None: + settings = PersistedSettings() + settings.update({"active_meta_profile": "balanced"}) + + settings.update({"active_meta_profile": None}) + + assert settings.active_meta_profile is None + agent = settings.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + assert agent.active_meta_profile is None + assert agent.enable_classify_and_switch_llm_tool is False + + +def test_update_without_active_meta_profile_leaves_it_unchanged() -> None: + settings = PersistedSettings() + settings.update({"active_meta_profile": "balanced"}) + + # An unrelated update must not reset the meta-profile. + settings.update({"active_profile": "fast"}) + + assert settings.active_meta_profile == "balanced" + agent = settings.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + assert agent.active_meta_profile == "balanced" + + +def test_active_meta_profile_round_trips_through_serialization() -> None: + settings = PersistedSettings() + settings.update({"active_meta_profile": "balanced"}) + + dumped = settings.model_dump(mode="json") + assert dumped["active_meta_profile"] == "balanced" + + reloaded = PersistedSettings.from_persisted(dumped) + assert reloaded.active_meta_profile == "balanced" + agent = reloaded.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + assert agent.active_meta_profile == "balanced" diff --git a/tests/agent_server/test_settings_router.py b/tests/agent_server/test_settings_router.py index 01656eb3e2..820856707a 100644 --- a/tests/agent_server/test_settings_router.py +++ b/tests/agent_server/test_settings_router.py @@ -642,7 +642,7 @@ def test_patch_settings_empty_payload_returns_400(client_with_settings): assert response.json()["detail"] == ( "At least one of agent_settings_diff, " "conversation_settings_diff, misc_settings_diff, " - "or active_profile must be provided" + "active_profile, or active_meta_profile must be provided" ) diff --git a/tests/sdk/llm/test_meta_profile_store.py b/tests/sdk/llm/test_meta_profile_store.py new file mode 100644 index 0000000000..ef81c0fb87 --- /dev/null +++ b/tests/sdk/llm/test_meta_profile_store.py @@ -0,0 +1,82 @@ +import json +from pathlib import Path + +import pytest + +from openhands.sdk.llm.meta_profile_store import ( + MetaProfile, + MetaProfileStore, +) + + +def _write(base: Path, name: str, data: dict) -> None: + (base / f"{name}.json").write_text(json.dumps(data), encoding="utf-8") + + +VALID = { + "classifier_model": "minimax", + "default_model": "gpt", + "classes": [ + {"description": "UI / images", "model": "deepseek"}, + {"description": "research", "model": "gemini"}, + ], +} + + +def test_load_valid_meta_profile(tmp_path: Path) -> None: + _write(tmp_path, "balanced", VALID) + store = MetaProfileStore(base_dir=tmp_path) + + meta = store.load("balanced") + + assert isinstance(meta, MetaProfile) + assert meta.classifier_model == "minimax" + assert meta.default_model == "gpt" + assert [c.model for c in meta.classes] == ["deepseek", "gemini"] + + +def test_load_accepts_name_with_json_suffix(tmp_path: Path) -> None: + _write(tmp_path, "balanced", VALID) + store = MetaProfileStore(base_dir=tmp_path) + + assert store.load("balanced.json").classifier_model == "minimax" + + +def test_list_returns_sorted_valid_names(tmp_path: Path) -> None: + _write(tmp_path, "b", VALID) + _write(tmp_path, "a", VALID) + # A file with an invalid stem must be ignored by list(). + (tmp_path / ".hidden.json").write_text("{}", encoding="utf-8") + store = MetaProfileStore(base_dir=tmp_path) + + assert store.list() == ["a", "b"] + + +def test_load_missing_raises_file_not_found(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + + with pytest.raises(FileNotFoundError): + store.load("nope") + + +def test_load_invalid_name_raises_value_error(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + + with pytest.raises(ValueError): + store.load("../escape") + + +def test_load_corrupted_json_raises_value_error(tmp_path: Path) -> None: + (tmp_path / "broken.json").write_text("{not json", encoding="utf-8") + store = MetaProfileStore(base_dir=tmp_path) + + with pytest.raises(ValueError): + store.load("broken") + + +def test_load_schema_violation_raises_value_error(tmp_path: Path) -> None: + _write(tmp_path, "bad", {"default_model": "gpt"}) # missing classifier_model + store = MetaProfileStore(base_dir=tmp_path) + + with pytest.raises(ValueError): + store.load("bad") diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py index 9930c448f4..a93d2b0358 100644 --- a/tests/sdk/test_settings.py +++ b/tests/sdk/test_settings.py @@ -69,6 +69,8 @@ def test_llm_agent_settings_export_schema_groups_sections() -> None: "tools", "enable_sub_agents", "enable_switch_llm_tool", + "enable_classify_and_switch_llm_tool", + "active_meta_profile", "tool_concurrency_limit", "mcp_config", } @@ -342,6 +344,8 @@ def test_export_agent_settings_schema_emits_variant_tagged_sections() -> None: "tools", "enable_sub_agents", "enable_switch_llm_tool", + "enable_classify_and_switch_llm_tool", + "active_meta_profile", "tool_concurrency_limit", "mcp_config", } diff --git a/tests/sdk/tool/test_classify_and_switch_llm.py b/tests/sdk/tool/test_classify_and_switch_llm.py new file mode 100644 index 0000000000..6f6582eeff --- /dev/null +++ b/tests/sdk/tool/test_classify_and_switch_llm.py @@ -0,0 +1,261 @@ +import json +import types +from pathlib import Path + +import pytest + +from openhands.sdk import LLM, LocalConversation, OpenHandsAgentSettings +from openhands.sdk.agent import Agent +from openhands.sdk.llm import Message, TextContent, llm_profile_store +from openhands.sdk.llm.llm_profile_store import LLMProfileStore +from openhands.sdk.llm.meta_profile_store import MetaProfileStore +from openhands.sdk.testing import TestLLM +from openhands.sdk.tool.builtins import ( + ClassifyAndSwitchLLMAction, + ClassifyAndSwitchLLMExecutor, + ClassifyAndSwitchLLMObservation, + ClassifyAndSwitchLLMTool, +) +from openhands.sdk.tool.builtins.classify_and_switch_llm import ( + _recent_messages_text, + build_classifier_prompt, + parse_class_index, +) + + +META = { + "classifier_model": "classifier", + "default_model": "default", + "classes": [ + {"description": "UI / images", "model": "fast"}, + {"description": "tests", "model": "slow"}, + ], +} + + +def _make_llm(model: str, usage_id: str) -> LLM: + return TestLLM.from_messages([], model=model, usage_id=usage_id) + + +@pytest.fixture() +def meta_store(tmp_path: Path) -> MetaProfileStore: + meta_dir = tmp_path / "meta-profiles" + meta_dir.mkdir() + (meta_dir / "balanced.json").write_text(json.dumps(META), encoding="utf-8") + return MetaProfileStore(base_dir=meta_dir) + + +@pytest.fixture() +def profile_store(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> LLMProfileStore: + profile_dir = tmp_path / "profiles" + profile_dir.mkdir() + monkeypatch.setattr(llm_profile_store, "_DEFAULT_PROFILE_DIR", profile_dir) + store = LLMProfileStore(base_dir=profile_dir) + store.save("fast", _make_llm("fast-model", "fast")) + store.save("slow", _make_llm("slow-model", "slow")) + store.save("default", _make_llm("default-profile-model", "default-profile")) + return store + + +def _make_conversation() -> LocalConversation: + return LocalConversation( + agent=Agent(llm=_make_llm("default-model", "default"), tools=[]), + workspace=Path.cwd(), + ) + + +def _patch_classifier( + conversation: LocalConversation, + monkeypatch: pytest.MonkeyPatch, + reply: str, +) -> None: + """Make the classifier profile resolve to a scripted TestLLM.""" + real_load = conversation._profile_store.load + + def fake_load(name: str, *, cipher=None): + if name == "classifier": + return TestLLM.from_messages( + [Message(role="assistant", content=[TextContent(text=reply)])], + usage_id="classifier", + ) + return real_load(name, cipher=cipher) + + monkeypatch.setattr(conversation._profile_store, "load", fake_load) + + +# ── pure helpers ────────────────────────────────────────────────────────── + + +def test_build_classifier_prompt_lists_categories() -> None: + from openhands.sdk.llm.meta_profile_store import MetaProfile + + prompt = build_classifier_prompt(MetaProfile.model_validate(META)) + assert "1. UI / images" in prompt + assert "2. tests" in prompt + assert "respond with 0" in prompt.lower() + + +@pytest.mark.parametrize( + "reply,expected", + [ + ("1", 1), + ("2", 2), + ("0", 0), + ("9", 0), # out of range -> default + ("none", 0), # no integer -> default + ("Category 2 fits best", 2), + ], +) +def test_parse_class_index(reply: str, expected: int) -> None: + assert parse_class_index(reply, num_classes=2) == expected + + +def test_recent_messages_text_takes_last_n() -> None: + from openhands.sdk.event.llm_convertible.message import MessageEvent + + events = [ + MessageEvent( + source="user", + llm_message=Message(role="user", content=[TextContent(text=f"m{i}")]), + ) + for i in range(8) + ] + conv = types.SimpleNamespace(state=types.SimpleNamespace(events=events)) + + text = _recent_messages_text(conv, limit=3) # type: ignore[arg-type] + assert text == "user: m5\nuser: m6\nuser: m7" + + +# ── create() ────────────────────────────────────────────────────────────── + + +def test_create_requires_active_meta_profile(meta_store: MetaProfileStore) -> None: + with pytest.raises(ValueError): + ClassifyAndSwitchLLMTool.create(meta_profile_store=meta_store) + + +def test_create_rejects_unknown_params(meta_store: MetaProfileStore) -> None: + with pytest.raises(ValueError): + ClassifyAndSwitchLLMTool.create( + active_meta_profile="balanced", + meta_profile_store=meta_store, + bogus=1, + ) + + +def test_create_loads_meta_profile(meta_store: MetaProfileStore) -> None: + tool = ClassifyAndSwitchLLMTool.create( + active_meta_profile="balanced", meta_profile_store=meta_store + )[0] + assert tool.name == "classify_and_switch_llm" + assert isinstance(tool.executor, ClassifyAndSwitchLLMExecutor) + assert tool.executor.meta_profile.classifier_model == "classifier" + + +# ── executor ────────────────────────────────────────────────────────────── + + +def _register_tool( + conversation: LocalConversation, meta_store: MetaProfileStore +) -> None: + tool = ClassifyAndSwitchLLMTool.create( + active_meta_profile="balanced", meta_profile_store=meta_store + )[0] + conversation._ensure_agent_ready() + conversation.agent.tools_map[tool.name] = tool + + +def test_executor_switches_to_matched_class( + profile_store, meta_store, monkeypatch +) -> None: + conversation = _make_conversation() + _register_tool(conversation, meta_store) + _patch_classifier(conversation, monkeypatch, reply="1") + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert isinstance(obs, ClassifyAndSwitchLLMObservation) + assert not obs.is_error + assert obs.model == "fast" + assert obs.chosen_class == "UI / images" + assert conversation.agent.llm.model == "fast-model" + + +def test_executor_falls_back_to_default_when_no_class( + profile_store, meta_store, monkeypatch +) -> None: + conversation = _make_conversation() + _register_tool(conversation, meta_store) + _patch_classifier(conversation, monkeypatch, reply="0") + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert isinstance(obs, ClassifyAndSwitchLLMObservation) + assert not obs.is_error + assert obs.model == "default" + assert obs.chosen_class is None + assert conversation.agent.llm.model == "default-profile-model" + + +def test_executor_errors_when_classifier_profile_missing( + profile_store, meta_store +) -> None: + conversation = _make_conversation() + _register_tool(conversation, meta_store) + # No "classifier" profile saved and no patch -> load fails. + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert obs.is_error + assert "classifier" in obs.text + assert conversation.agent.llm.model == "default-model" + + +def test_executor_errors_when_target_profile_missing( + profile_store, meta_store, monkeypatch +) -> None: + conversation = _make_conversation() + _register_tool(conversation, meta_store) + # classifier picks class 2 -> "slow", but remove it from the store. + (Path(profile_store.base_dir) / "slow.json").unlink() + _patch_classifier(conversation, monkeypatch, reply="2") + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert isinstance(obs, ClassifyAndSwitchLLMObservation) + assert obs.is_error + assert obs.model == "slow" + assert "not found" in obs.text + + +# ── create_agent wiring ───────────────────────────────────────────────────── + + +def test_create_agent_adds_tool_when_enabled(meta_store, monkeypatch) -> None: + monkeypatch.setattr( + "openhands.sdk.llm.meta_profile_store._DEFAULT_META_PROFILE_DIR", + meta_store.base_dir, + ) + agent = OpenHandsAgentSettings( + llm=_make_llm("default-model", "default"), + enable_classify_and_switch_llm_tool=True, + active_meta_profile="balanced", + ).create_agent() + + assert any(t.name == "ClassifyAndSwitchLLMTool" for t in agent.tools) + + +def test_create_agent_omits_tool_when_disabled() -> None: + agent = OpenHandsAgentSettings( + llm=_make_llm("default-model", "default"), + ).create_agent() + + assert not any(t.name == "ClassifyAndSwitchLLMTool" for t in agent.tools) From 4907cf89c3796d3c9567ab2fcfaf96d50ee46fab Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 16 Jun 2026 01:38:29 -0300 Subject: [PATCH 2/8] Fall back to first meta-profile when none is active When the classify_and_switch_llm tool is enabled without an active meta-profile, resolve the first available meta-profile from the store instead of requiring one. Wire the tool in create_agent() whenever it is enabled, and only error if no meta-profiles exist at all. Co-authored-by: openhands --- openhands-sdk/openhands/sdk/settings/model.py | 19 ++++---- .../tool/builtins/classify_and_switch_llm.py | 20 ++++++--- .../sdk/tool/test_classify_and_switch_llm.py | 45 ++++++++++++++++++- 3 files changed, 68 insertions(+), 16 deletions(-) diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index 44b60c76f9..b3da4fd23d 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -1029,8 +1029,8 @@ class OpenHandsAgentSettings(AgentSettingsBase): default=False, description=( "Enable the built-in classify_and_switch_llm tool, which routes the " - "task to the best LLM profile using the active meta-profile. Requires " - "active_meta_profile to be set." + "task to the best LLM profile using the active meta-profile. When no " + "active_meta_profile is set, the first available meta-profile is used." ), json_schema_extra={ SETTINGS_METADATA_KEY: SettingsFieldMetadata( @@ -1167,15 +1167,16 @@ def create_agent(self) -> Agent: # The routing tool needs the active meta-profile name, which the # name-only ``include_default_tools`` path cannot pass, so add it as a - # ``Tool`` spec carrying the param. + # ``Tool`` spec carrying the param. When no meta-profile is active, the + # tool falls back to the first available one, so we still wire it. tools = list(self.tools) - if self.enable_classify_and_switch_llm_tool and self.active_meta_profile: - tools.append( - Tool( - name=ClassifyAndSwitchLLMTool.__name__, - params={"active_meta_profile": self.active_meta_profile}, - ) + if self.enable_classify_and_switch_llm_tool: + params = ( + {"active_meta_profile": self.active_meta_profile} + if self.active_meta_profile + else {} ) + tools.append(Tool(name=ClassifyAndSwitchLLMTool.__name__, params=params)) llm = create_subscription_llm_from_config(self.llm) condenser = None if llm.is_subscription else self.build_condenser(llm) diff --git a/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py b/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py index 41929a7a3b..72895569c6 100644 --- a/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py +++ b/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py @@ -262,13 +262,23 @@ def create( "ClassifyAndSwitchLLMTool only accepts 'active_meta_profile' " "and 'meta_profile_store'." ) - if not active_meta_profile: - raise ValueError( - "ClassifyAndSwitchLLMTool requires an 'active_meta_profile' name." - ) store = meta_profile_store or MetaProfileStore() - meta = store.load(active_meta_profile) + name = active_meta_profile + if not name: + # No meta-profile is active: fall back to the first available one. + available = store.list() + if not available: + raise ValueError( + "ClassifyAndSwitchLLMTool requires at least one meta-profile, " + "but none were found in the meta-profile store." + ) + name = available[0] + logger.info( + "No active meta-profile set; falling back to first available: %r", + name, + ) + meta = store.load(name) return [ cls( description=_DESCRIPTION, diff --git a/tests/sdk/tool/test_classify_and_switch_llm.py b/tests/sdk/tool/test_classify_and_switch_llm.py index 6f6582eeff..301b6acd27 100644 --- a/tests/sdk/tool/test_classify_and_switch_llm.py +++ b/tests/sdk/tool/test_classify_and_switch_llm.py @@ -129,9 +129,34 @@ def test_recent_messages_text_takes_last_n() -> None: # ── create() ────────────────────────────────────────────────────────────── -def test_create_requires_active_meta_profile(meta_store: MetaProfileStore) -> None: +def test_create_falls_back_to_first_when_no_active_profile( + meta_store: MetaProfileStore, +) -> None: + # "balanced" is the only profile, so it is the first one found. + tool = ClassifyAndSwitchLLMTool.create(meta_profile_store=meta_store)[0] + assert isinstance(tool.executor, ClassifyAndSwitchLLMExecutor) + assert tool.executor.meta_profile.classifier_model == "classifier" + + +def test_create_picks_alphabetically_first_meta_profile( + meta_store: MetaProfileStore, +) -> None: + # Add a second profile whose name sorts before "balanced". + other = dict(META) + other["classifier_model"] = "other-classifier" + (Path(meta_store.base_dir) / "aaa.json").write_text( + json.dumps(other), encoding="utf-8" + ) + assert meta_store.list()[0] == "aaa" + tool = ClassifyAndSwitchLLMTool.create(meta_profile_store=meta_store)[0] + assert isinstance(tool.executor, ClassifyAndSwitchLLMExecutor) + assert tool.executor.meta_profile.classifier_model == "other-classifier" + + +def test_create_raises_when_no_meta_profiles_exist(tmp_path: Path) -> None: + empty_store = MetaProfileStore(base_dir=tmp_path / "empty") with pytest.raises(ValueError): - ClassifyAndSwitchLLMTool.create(meta_profile_store=meta_store) + ClassifyAndSwitchLLMTool.create(meta_profile_store=empty_store) def test_create_rejects_unknown_params(meta_store: MetaProfileStore) -> None: @@ -253,6 +278,22 @@ def test_create_agent_adds_tool_when_enabled(meta_store, monkeypatch) -> None: assert any(t.name == "ClassifyAndSwitchLLMTool" for t in agent.tools) +def test_create_agent_adds_tool_when_enabled_without_active_profile( + meta_store, monkeypatch +) -> None: + monkeypatch.setattr( + "openhands.sdk.llm.meta_profile_store._DEFAULT_META_PROFILE_DIR", + meta_store.base_dir, + ) + # No active_meta_profile: the tool is still wired and resolves the first one. + agent = OpenHandsAgentSettings( + llm=_make_llm("default-model", "default"), + enable_classify_and_switch_llm_tool=True, + ).create_agent() + + assert any(t.name == "ClassifyAndSwitchLLMTool" for t in agent.tools) + + def test_create_agent_omits_tool_when_disabled() -> None: agent = OpenHandsAgentSettings( llm=_make_llm("default-model", "default"), From 87b3830da00e7a644a08cd748a4f5b43b9c1bf36 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 16 Jun 2026 15:22:54 -0300 Subject: [PATCH 3/8] Add meta-profile CRUD endpoints (/api/meta-profiles) Adds MetaProfileStore.save()/delete()/list_summaries() plus a meta_profiles_router mirroring profiles_router: list, get, save, delete, and activate (records active_meta_profile in settings). This lets clients (e.g. Agent Canvas) manage meta-profiles for the classify_and_switch_llm routing tool, which previously could only have its active_meta_profile set via PATCH /api/settings. Co-authored-by: openhands --- .../openhands/agent_server/api.py | 2 + .../agent_server/meta_profiles_router.py | 224 ++++++++++++++++++ openhands-sdk/openhands/sdk/llm/__init__.py | 2 + .../openhands/sdk/llm/meta_profile_store.py | 93 +++++++- .../agent_server/test_meta_profiles_router.py | 172 ++++++++++++++ tests/sdk/llm/test_meta_profile_store.py | 77 ++++++ 6 files changed, 569 insertions(+), 1 deletion(-) create mode 100644 openhands-agent-server/openhands/agent_server/meta_profiles_router.py create mode 100644 tests/agent_server/test_meta_profiles_router.py diff --git a/openhands-agent-server/openhands/agent_server/api.py b/openhands-agent-server/openhands/agent_server/api.py index 1ff882a20f..067fc02aec 100644 --- a/openhands-agent-server/openhands/agent_server/api.py +++ b/openhands-agent-server/openhands/agent_server/api.py @@ -39,6 +39,7 @@ from openhands.agent_server.hooks_router import hooks_router from openhands.agent_server.llm_router import llm_router from openhands.agent_server.mcp_router import mcp_router +from openhands.agent_server.meta_profiles_router import meta_profiles_router from openhands.agent_server.middleware import CORSDispatcher from openhands.agent_server.openai.router import ( create_openai_api_key_dependency, @@ -316,6 +317,7 @@ def _add_api_routes(app: FastAPI, config: Config) -> None: api_router.include_router(settings_router) api_router.include_router(workspaces_router) api_router.include_router(profiles_router) + api_router.include_router(meta_profiles_router) # /api/auth/* mints workspace cookies and requires the header to bootstrap, # so it lives under the header-only auth group. api_router.include_router(auth_router) diff --git a/openhands-agent-server/openhands/agent_server/meta_profiles_router.py b/openhands-agent-server/openhands/agent_server/meta_profiles_router.py new file mode 100644 index 0000000000..e449baeaa4 --- /dev/null +++ b/openhands-agent-server/openhands/agent_server/meta_profiles_router.py @@ -0,0 +1,224 @@ +"""HTTP endpoints for managing meta-profiles (model-routing configurations). + +A meta-profile names a ``classifier_model``, a ``default_model`` and a list of +task ``classes``. Every model reference is the name of a saved LLM profile, so +this router intentionally mirrors :mod:`profiles_router` but stores plain JSON +documents (no secrets) via :class:`MetaProfileStore`. +""" + +from collections.abc import Iterator +from contextlib import contextmanager +from typing import Annotated + +from fastapi import APIRouter, HTTPException, Path, Request, status +from pydantic import BaseModel + +from openhands.agent_server._secrets_exposure import get_config +from openhands.agent_server.persistence import ( + PersistedSettings, + get_settings_store, +) +from openhands.sdk.llm.llm_profile_store import PROFILE_NAME_PATTERN +from openhands.sdk.llm.meta_profile_store import ( + MetaProfile, + MetaProfileLimitExceeded, + MetaProfileStore, +) +from openhands.sdk.logger import get_logger + + +logger = get_logger(__name__) + +meta_profiles_router = APIRouter(prefix="/meta-profiles", tags=["Meta-profiles"]) + +MAX_META_PROFILES = 50 + +MetaProfileName = Annotated[ + str, + Path(min_length=1, max_length=64, pattern=PROFILE_NAME_PATTERN), +] + + +class MetaProfileInfo(BaseModel): + name: str + classifier_model: str | None = None + default_model: str | None = None + num_classes: int = 0 + + +class MetaProfileListResponse(BaseModel): + meta_profiles: list[MetaProfileInfo] + active_meta_profile: str | None = None + + +class MetaProfileDetailResponse(BaseModel): + name: str + config: MetaProfile + + +class MetaProfileMutationResponse(BaseModel): + name: str + message: str + + +class ActivateMetaProfileResponse(BaseModel): + name: str + message: str + + +@contextmanager +def _store_errors() -> Iterator[None]: + """Map ``MetaProfileStore`` validation errors to HTTP responses.""" + try: + yield + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + + +def _set_active_meta_profile_if_matches( + request: Request, old_name: str, new_name: str | None +) -> bool: + config = get_config(request) + settings_store = get_settings_store(config) + settings = settings_store.load() or PersistedSettings() + if settings.active_meta_profile != old_name: + return False + + def update_active(settings: PersistedSettings) -> PersistedSettings: + settings.active_meta_profile = new_name + return settings + + settings_store.update(update_active) + return True + + +@meta_profiles_router.get("", response_model=MetaProfileListResponse) +async def list_meta_profiles(request: Request) -> MetaProfileListResponse: + """List all saved meta-profiles and the currently active one.""" + config = get_config(request) + settings_store = get_settings_store(config) + settings = settings_store.load() or PersistedSettings() + + store = MetaProfileStore() + with _store_errors(): + summaries = store.list_summaries() + + return MetaProfileListResponse( + meta_profiles=[MetaProfileInfo(**s) for s in summaries], + active_meta_profile=settings.active_meta_profile, + ) + + +@meta_profiles_router.get("/{name}", response_model=MetaProfileDetailResponse) +async def get_meta_profile(name: MetaProfileName) -> MetaProfileDetailResponse: + """Get a meta-profile's full configuration.""" + store = MetaProfileStore() + try: + with _store_errors(): + meta_profile = store.load(name) + except FileNotFoundError: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Meta-profile '{name}' not found", + ) + + return MetaProfileDetailResponse(name=name, config=meta_profile) + + +@meta_profiles_router.post( + "/{name}", + response_model=MetaProfileMutationResponse, + status_code=status.HTTP_201_CREATED, +) +async def save_meta_profile( + name: MetaProfileName, + body: MetaProfile, +) -> MetaProfileMutationResponse: + """Save (create or overwrite) a meta-profile. + + Returns 409 if creating a new meta-profile would exceed + ``MAX_META_PROFILES``. + """ + store = MetaProfileStore() + try: + with _store_errors(): + store.save(name, body, max_profiles=MAX_META_PROFILES) + except MetaProfileLimitExceeded: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=( + f"Meta-profile limit reached ({MAX_META_PROFILES}). " + "Delete a meta-profile before saving a new one." + ), + ) + + logger.info(f"Saved meta-profile '{name}'") + return MetaProfileMutationResponse( + name=name, message=f"Meta-profile '{name}' saved" + ) + + +@meta_profiles_router.delete("/{name}", response_model=MetaProfileMutationResponse) +async def delete_meta_profile( + request: Request, name: MetaProfileName +) -> MetaProfileMutationResponse: + """Delete a meta-profile (idempotent). + + If the deleted meta-profile is the active one, ``active_meta_profile`` is + cleared. + """ + store = MetaProfileStore() + with _store_errors(): + store.delete(name) + if _set_active_meta_profile_if_matches(request, name, None): + logger.info(f"Cleared active_meta_profile for deleted meta-profile '{name}'") + logger.info(f"Deleted meta-profile '{name}'") + return MetaProfileMutationResponse( + name=name, message=f"Meta-profile '{name}' deleted" + ) + + +@meta_profiles_router.post( + "/{name}/activate", response_model=ActivateMetaProfileResponse +) +async def activate_meta_profile( + request: Request, name: MetaProfileName +) -> ActivateMetaProfileResponse: + """Activate a meta-profile by recording it as ``active_meta_profile``. + + Unlike LLM profiles, activating a meta-profile does not mutate the agent's + LLM config — it only records which meta-profile the + ``classify_and_switch_llm`` tool should route with. Returns 404 if the + meta-profile does not exist. + """ + # Verify the meta-profile exists (and is valid) before activating. + store = MetaProfileStore() + try: + with _store_errors(): + store.load(name) + except FileNotFoundError: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Meta-profile '{name}' not found", + ) + + config = get_config(request) + settings_store = get_settings_store(config) + + def apply_active(settings: PersistedSettings) -> PersistedSettings: + settings.active_meta_profile = name + return settings + + try: + settings_store.update(apply_active) + except (OSError, PermissionError): + logger.error("Failed to activate meta-profile - file I/O error") + raise HTTPException(status_code=500, detail="Failed to activate meta-profile") + + logger.info(f"Activated meta-profile '{name}'") + return ActivateMetaProfileResponse( + name=name, message=f"Meta-profile '{name}' activated" + ) diff --git a/openhands-sdk/openhands/sdk/llm/__init__.py b/openhands-sdk/openhands/sdk/llm/__init__.py index 26bd6a158a..bd42203fb7 100644 --- a/openhands-sdk/openhands/sdk/llm/__init__.py +++ b/openhands-sdk/openhands/sdk/llm/__init__.py @@ -22,6 +22,7 @@ from openhands.sdk.llm.meta_profile_store import ( MetaProfile, MetaProfileClass, + MetaProfileLimitExceeded, MetaProfileStore, ) from openhands.sdk.llm.router import RouterLLM @@ -53,6 +54,7 @@ "LLMProfileStore", "MetaProfile", "MetaProfileClass", + "MetaProfileLimitExceeded", "MetaProfileStore", "RouterLLM", "RegistryEvent", diff --git a/openhands-sdk/openhands/sdk/llm/meta_profile_store.py b/openhands-sdk/openhands/sdk/llm/meta_profile_store.py index 1d4082f27d..3b80933695 100644 --- a/openhands-sdk/openhands/sdk/llm/meta_profile_store.py +++ b/openhands-sdk/openhands/sdk/llm/meta_profile_store.py @@ -14,8 +14,9 @@ from __future__ import annotations import json +import tempfile from pathlib import Path -from typing import Final +from typing import Any, Final from pydantic import BaseModel, Field @@ -28,6 +29,10 @@ logger = get_logger(__name__) +class MetaProfileLimitExceeded(Exception): + """Raised when saving would exceed the configured meta-profile limit.""" + + class MetaProfileClass(BaseModel): """A single task category and the LLM profile that should handle it.""" @@ -108,3 +113,89 @@ def load(self, name: str) -> MetaProfile: return MetaProfile.model_validate(data) except Exception as e: raise ValueError(f"Failed to load meta-profile `{name}`: {e}") from e + + def save( + self, + name: str, + meta_profile: MetaProfile, + *, + max_profiles: int | None = None, + ) -> None: + """Persist a meta-profile under ``name`` (atomic write, overwrites). + + Args: + name: Name of the meta-profile to save. + meta_profile: The meta-profile to persist. + max_profiles: Optional cap on the number of meta-profiles. When set, + raises :class:`MetaProfileLimitExceeded` if creating a *new* + meta-profile would exceed the limit. + + Raises: + MetaProfileLimitExceeded: If ``max_profiles`` would be exceeded. + ValueError: If ``name`` is not a valid meta-profile name. + """ + path = self._get_path(name) + + if max_profiles is not None and not path.exists(): + if len(self.list()) >= max_profiles: + raise MetaProfileLimitExceeded( + f"Meta-profile limit reached ({max_profiles})." + ) + + profile_json = json.dumps(meta_profile.model_dump(mode="json"), indent=2) + with tempfile.NamedTemporaryFile( + mode="w", + dir=self.base_dir, + suffix=".tmp", + delete=False, + encoding="utf-8", + ) as tmp: + tmp.write(profile_json) + tmp_path = Path(tmp.name) + + try: + Path.replace(tmp_path, path) + except Exception: + tmp_path.unlink(missing_ok=True) + raise + logger.info(f"Saved meta-profile `{name}` at {path}") + + def delete(self, name: str) -> None: + """Delete a meta-profile (idempotent — missing names are a no-op). + + Raises: + ValueError: If ``name`` is not a valid meta-profile name. + """ + path = self._get_path(name) + if not path.exists(): + logger.info(f"Meta-profile `{name}` not found. Skipping delete.") + return + path.unlink() + logger.info(f"Deleted meta-profile `{name}`") + + def list_summaries(self) -> list[dict[str, Any]]: + """List meta-profile metadata without full schema validation. + + Files with corrupted JSON or non-dict top-level values are skipped with + a warning so a single bad file never breaks the listing. + """ + summaries: list[dict[str, Any]] = [] + for name in self.list(): + try: + data = json.loads(self._get_path(name).read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as e: + logger.warning(f"Skipping corrupted meta-profile {name!r}: {e}") + continue + if not isinstance(data, dict): + logger.warning(f"Skipping non-dict meta-profile {name!r}") + continue + classes = data.get("classes") or [] + summaries.append( + { + "name": name, + "classifier_model": data.get("classifier_model"), + "default_model": data.get("default_model"), + "num_classes": len(classes) if isinstance(classes, list) else 0, + } + ) + return summaries diff --git a/tests/agent_server/test_meta_profiles_router.py b/tests/agent_server/test_meta_profiles_router.py new file mode 100644 index 0000000000..1951c3d224 --- /dev/null +++ b/tests/agent_server/test_meta_profiles_router.py @@ -0,0 +1,172 @@ +"""Tests for meta_profiles_router endpoints.""" + +import tempfile +from pathlib import Path +from unittest.mock import patch + +import pytest +from fastapi.testclient import TestClient + +from openhands.agent_server.api import create_app +from openhands.agent_server.config import Config +from openhands.agent_server.persistence import reset_stores +from openhands.sdk.llm.meta_profile_store import MetaProfile, MetaProfileStore + + +@pytest.fixture +def temp_meta_profiles_dir(): + with tempfile.TemporaryDirectory() as tmpdir: + meta_dir = Path(tmpdir) / "meta-profiles" + meta_dir.mkdir(parents=True, exist_ok=True) + yield meta_dir + + +@pytest.fixture +def temp_settings_dir(): + with tempfile.TemporaryDirectory() as tmpdir: + settings_dir = Path(tmpdir) / "settings" + settings_dir.mkdir(parents=True, exist_ok=True) + yield settings_dir + + +@pytest.fixture +def client(temp_meta_profiles_dir, temp_settings_dir, monkeypatch): + reset_stores() + monkeypatch.setenv("OH_PERSISTENCE_DIR", str(temp_settings_dir)) + config = Config(static_files_path=None, session_api_keys=[], secret_key=None) + app = create_app(config) + with patch( + "openhands.agent_server.meta_profiles_router.MetaProfileStore", + lambda: MetaProfileStore(base_dir=temp_meta_profiles_dir), + ): + yield TestClient(app) + reset_stores() + + +@pytest.fixture +def store(temp_meta_profiles_dir): + return MetaProfileStore(base_dir=temp_meta_profiles_dir) + + +def _meta(classifier="minimax", default="gpt", classes=None) -> dict: + return MetaProfile( + classifier_model=classifier, + default_model=default, + classes=classes or [{"description": "UI", "model": "deepseek"}], + ).model_dump(mode="json") + + +# ── List ──────────────────────────────────────────────────────────────────── + + +def test_list_empty(client): + response = client.get("/api/meta-profiles") + assert response.status_code == 200 + body = response.json() + assert body["meta_profiles"] == [] + assert body["active_meta_profile"] is None + + +def test_list_returns_summaries(client, store): + store.save("balanced", MetaProfile.model_validate(_meta())) + response = client.get("/api/meta-profiles") + assert response.status_code == 200 + body = response.json() + assert body["meta_profiles"] == [ + { + "name": "balanced", + "classifier_model": "minimax", + "default_model": "gpt", + "num_classes": 1, + } + ] + + +# ── Get ────────────────────────────────────────────────────────────────────── + + +def test_get_meta_profile(client, store): + store.save("balanced", MetaProfile.model_validate(_meta())) + response = client.get("/api/meta-profiles/balanced") + assert response.status_code == 200 + body = response.json() + assert body["name"] == "balanced" + assert body["config"]["classifier_model"] == "minimax" + assert body["config"]["classes"][0]["model"] == "deepseek" + + +def test_get_missing_returns_404(client): + response = client.get("/api/meta-profiles/nope") + assert response.status_code == 404 + + +# ── Save ───────────────────────────────────────────────────────────────────── + + +def test_save_creates_meta_profile(client, store): + response = client.post("/api/meta-profiles/balanced", json=_meta()) + assert response.status_code == 201 + assert store.load("balanced").classifier_model == "minimax" + + +def test_save_overwrites(client, store): + client.post("/api/meta-profiles/p", json=_meta(classifier="a", default="b")) + response = client.post( + "/api/meta-profiles/p", json=_meta(classifier="c", default="d") + ) + assert response.status_code == 201 + assert store.load("p").classifier_model == "c" + + +def test_save_invalid_body_returns_422(client): + response = client.post("/api/meta-profiles/p", json={"default_model": "gpt"}) + assert response.status_code == 422 + + +def test_save_invalid_name_returns_422(client): + response = client.post("/api/meta-profiles/..bad..", json=_meta()) + assert response.status_code == 422 + + +# ── Delete ─────────────────────────────────────────────────────────────────── + + +def test_delete_meta_profile(client, store): + store.save("p", MetaProfile.model_validate(_meta())) + response = client.delete("/api/meta-profiles/p") + assert response.status_code == 200 + assert store.list() == [] + + +def test_delete_is_idempotent(client): + response = client.delete("/api/meta-profiles/nope") + assert response.status_code == 200 + + +def test_delete_active_clears_active(client, store): + store.save("p", MetaProfile.model_validate(_meta())) + activate = client.post("/api/meta-profiles/p/activate") + assert activate.status_code == 200 + + client.delete("/api/meta-profiles/p") + + listed = client.get("/api/meta-profiles").json() + assert listed["active_meta_profile"] is None + + +# ── Activate ───────────────────────────────────────────────────────────────── + + +def test_activate_sets_active_meta_profile(client, store): + store.save("p", MetaProfile.model_validate(_meta())) + response = client.post("/api/meta-profiles/p/activate") + assert response.status_code == 200 + assert response.json()["name"] == "p" + + listed = client.get("/api/meta-profiles").json() + assert listed["active_meta_profile"] == "p" + + +def test_activate_missing_returns_404(client): + response = client.post("/api/meta-profiles/nope/activate") + assert response.status_code == 404 diff --git a/tests/sdk/llm/test_meta_profile_store.py b/tests/sdk/llm/test_meta_profile_store.py index ef81c0fb87..890a1e9cfa 100644 --- a/tests/sdk/llm/test_meta_profile_store.py +++ b/tests/sdk/llm/test_meta_profile_store.py @@ -5,6 +5,8 @@ from openhands.sdk.llm.meta_profile_store import ( MetaProfile, + MetaProfileClass, + MetaProfileLimitExceeded, MetaProfileStore, ) @@ -80,3 +82,78 @@ def test_load_schema_violation_raises_value_error(tmp_path: Path) -> None: with pytest.raises(ValueError): store.load("bad") + + +def test_save_then_load_roundtrip(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + meta = MetaProfile( + classifier_model="minimax", + default_model="gpt", + classes=[MetaProfileClass(description="UI / images", model="deepseek")], + ) + + store.save("balanced", meta) + + loaded = store.load("balanced") + assert loaded.classifier_model == "minimax" + assert loaded.default_model == "gpt" + assert [c.model for c in loaded.classes] == ["deepseek"] + + +def test_save_overwrites_existing(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + store.save("p", MetaProfile(classifier_model="a", default_model="b")) + store.save("p", MetaProfile(classifier_model="c", default_model="d")) + + assert store.load("p").classifier_model == "c" + assert store.list() == ["p"] + + +def test_save_invalid_name_raises_value_error(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + + with pytest.raises(ValueError): + store.save("../escape", MetaProfile(classifier_model="a", default_model="b")) + + +def test_save_respects_max_profiles(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + store.save("a", MetaProfile(classifier_model="a", default_model="b")) + + with pytest.raises(MetaProfileLimitExceeded): + store.save( + "b", MetaProfile(classifier_model="a", default_model="b"), max_profiles=1 + ) + + # Overwriting an existing one is still allowed at the limit. + store.save( + "a", MetaProfile(classifier_model="x", default_model="y"), max_profiles=1 + ) + assert store.load("a").classifier_model == "x" + + +def test_delete_is_idempotent(tmp_path: Path) -> None: + store = MetaProfileStore(base_dir=tmp_path) + store.save("p", MetaProfile(classifier_model="a", default_model="b")) + + store.delete("p") + assert store.list() == [] + # Deleting a missing meta-profile is a no-op. + store.delete("p") + + +def test_list_summaries_skips_corrupted(tmp_path: Path) -> None: + _write(tmp_path, "good", VALID) + (tmp_path / "broken.json").write_text("{not json", encoding="utf-8") + store = MetaProfileStore(base_dir=tmp_path) + + summaries = store.list_summaries() + + assert summaries == [ + { + "name": "good", + "classifier_model": "minimax", + "default_model": "gpt", + "num_classes": 2, + } + ] From 6468a931d8b3ac7f41c7e733d076b47185bbb0f0 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 16 Jun 2026 17:09:07 -0300 Subject: [PATCH 4/8] Address review: wire meta-profile activation, defer load, lock store Fixes from PR review of the meta-profile management surface: 1. Activation/clear now route through PersistedSettings.update({...}) instead of a direct field assignment in the settings-store callback. Direct assignment only set the top-level active_meta_profile and never propagated into agent_settings (active_meta_profile + enable_classify_and_switch_llm_tool), so /activate reported success while the routing tool was never attached. The delete helper had the same top-level/nested drift. (meta_profiles_router.py) 2. ClassifyAndSwitchLLMTool defers meta-profile resolution from create() to execution time. A missing/renamed file under ~/.openhands/ meta-profiles no longer breaks conversation startup (_ensure_agent_ready); it surfaces as a tool error observation instead. 3. MetaProfileStore.save()/delete() now hold a FileLock across the pr pr pr pr pr pr pr pr pr pr pr pr pr pr concurre pr pr pr pr pr pr pr pr pr pr pr pr pr : a pr pr pr pr pr pr pr pr pr pr pr pgation (not just the facade), guarding against drift. - Tool: create() no longer touches disk / raises on missing profiles; invocation reports the error; a missing active meta-profile does not break _ensure_agent_ready(). - Store: concurrent saves respect max_profiles under the lock. Co-authored-by: openhands --- .../agent_server/meta_profiles_router.py | 12 ++- .../openhands/sdk/llm/meta_profile_store.py | 81 +++++++++++------ .../tool/builtins/classify_and_switch_llm.py | 63 +++++++++---- .../agent_server/test_meta_profiles_router.py | 39 +++++++- tests/sdk/llm/test_meta_profile_store.py | 29 ++++++ .../sdk/tool/test_classify_and_switch_llm.py | 90 +++++++++++++++++-- 6 files changed, 260 insertions(+), 54 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/meta_profiles_router.py b/openhands-agent-server/openhands/agent_server/meta_profiles_router.py index e449baeaa4..e24fc3b3a4 100644 --- a/openhands-agent-server/openhands/agent_server/meta_profiles_router.py +++ b/openhands-agent-server/openhands/agent_server/meta_profiles_router.py @@ -88,7 +88,11 @@ def _set_active_meta_profile_if_matches( return False def update_active(settings: PersistedSettings) -> PersistedSettings: - settings.active_meta_profile = new_name + # Route through PersistedSettings.update() so the change also + # propagates into agent_settings (active_meta_profile + + # enable_classify_and_switch_llm_tool); a direct field assignment + # would leave that nested state stale. + settings.update({"active_meta_profile": new_name}) return settings settings_store.update(update_active) @@ -209,7 +213,11 @@ async def activate_meta_profile( settings_store = get_settings_store(config) def apply_active(settings: PersistedSettings) -> PersistedSettings: - settings.active_meta_profile = name + # Route through PersistedSettings.update() so activation also wires + # agent_settings (active_meta_profile + enable_classify_and_switch_llm_tool), + # which is what actually attaches the routing tool. A direct field + # assignment would record the active name but never enable the tool. + settings.update({"active_meta_profile": name}) return settings try: diff --git a/openhands-sdk/openhands/sdk/llm/meta_profile_store.py b/openhands-sdk/openhands/sdk/llm/meta_profile_store.py index 3b80933695..73f9640baa 100644 --- a/openhands-sdk/openhands/sdk/llm/meta_profile_store.py +++ b/openhands-sdk/openhands/sdk/llm/meta_profile_store.py @@ -15,9 +15,12 @@ import json import tempfile +from collections.abc import Iterator +from contextlib import contextmanager from pathlib import Path from typing import Any, Final +from filelock import FileLock, Timeout from pydantic import BaseModel, Field from openhands.sdk.llm.llm_profile_store import PROFILE_NAME_REGEX @@ -25,6 +28,7 @@ _DEFAULT_META_PROFILE_DIR: Final[Path] = Path.home() / ".openhands" / "meta-profiles" +_LOCK_TIMEOUT_SECONDS: Final[float] = 30.0 logger = get_logger(__name__) @@ -75,6 +79,28 @@ def __init__(self, base_dir: Path | str | None = None) -> None: Path(base_dir) if base_dir is not None else _DEFAULT_META_PROFILE_DIR ) self.base_dir.mkdir(parents=True, exist_ok=True) + self._file_lock = FileLock(self.base_dir / ".meta-profiles.lock") + + @contextmanager + def _acquire_lock(self, timeout: float = _LOCK_TIMEOUT_SECONDS) -> Iterator[None]: + """Acquire the file lock for safe concurrent access. + + The lock is reentrant within a process, so methods that hold it may + call other locked methods (e.g. ``save`` calls ``list``). + + Raises: + TimeoutError: If the lock cannot be acquired within ``timeout``. + """ + try: + with self._file_lock.acquire(timeout=timeout): + yield + except Timeout: + logger.error( + f"[Meta-profile Store] Failed to acquire lock within {timeout}s" + ) + raise TimeoutError( + f"Meta-profile store lock acquisition timed out after {timeout}s" + ) def list(self) -> list[str]: """Return the names (without ``.json``) of all stored meta-profiles.""" @@ -136,28 +162,32 @@ def save( """ path = self._get_path(name) - if max_profiles is not None and not path.exists(): - if len(self.list()) >= max_profiles: - raise MetaProfileLimitExceeded( - f"Meta-profile limit reached ({max_profiles})." - ) - - profile_json = json.dumps(meta_profile.model_dump(mode="json"), indent=2) - with tempfile.NamedTemporaryFile( - mode="w", - dir=self.base_dir, - suffix=".tmp", - delete=False, - encoding="utf-8", - ) as tmp: - tmp.write(profile_json) - tmp_path = Path(tmp.name) + # Hold the lock across the precondition check and the atomic replace so + # concurrent creators cannot both pass the limit check and overshoot + # ``max_profiles`` (TOCTOU), mirroring ``LLMProfileStore``. + with self._acquire_lock(): + if max_profiles is not None and not path.exists(): + if len(self.list()) >= max_profiles: + raise MetaProfileLimitExceeded( + f"Meta-profile limit reached ({max_profiles})." + ) + + profile_json = json.dumps(meta_profile.model_dump(mode="json"), indent=2) + with tempfile.NamedTemporaryFile( + mode="w", + dir=self.base_dir, + suffix=".tmp", + delete=False, + encoding="utf-8", + ) as tmp: + tmp.write(profile_json) + tmp_path = Path(tmp.name) - try: - Path.replace(tmp_path, path) - except Exception: - tmp_path.unlink(missing_ok=True) - raise + try: + Path.replace(tmp_path, path) + except Exception: + tmp_path.unlink(missing_ok=True) + raise logger.info(f"Saved meta-profile `{name}` at {path}") def delete(self, name: str) -> None: @@ -167,10 +197,11 @@ def delete(self, name: str) -> None: ValueError: If ``name`` is not a valid meta-profile name. """ path = self._get_path(name) - if not path.exists(): - logger.info(f"Meta-profile `{name}` not found. Skipping delete.") - return - path.unlink() + with self._acquire_lock(): + if not path.exists(): + logger.info(f"Meta-profile `{name}` not found. Skipping delete.") + return + path.unlink() logger.info(f"Deleted meta-profile `{name}`") def list_summaries(self) -> list[dict[str, Any]]: diff --git a/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py b/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py index 72895569c6..8090ef7352 100644 --- a/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py +++ b/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py @@ -131,8 +131,38 @@ def _recent_messages_text( class ClassifyAndSwitchLLMExecutor(ToolExecutor): - def __init__(self, meta_profile: MetaProfile) -> None: - self.meta_profile = meta_profile + def __init__( + self, + meta_profile_store: MetaProfileStore, + active_meta_profile: str | None = None, + ) -> None: + # Resolve the meta-profile lazily (at invocation), not at construction, + # so a missing/renamed file under ~/.openhands/meta-profiles produces a + # tool error instead of breaking conversation startup. + self._store = meta_profile_store + self._active_meta_profile = active_meta_profile + + def _resolve_meta_profile(self) -> MetaProfile: + """Resolve the active meta-profile, falling back to the first available. + + Raises: + FileNotFoundError: If no meta-profile can be resolved. + ValueError: If the resolved meta-profile is invalid. + """ + name = self._active_meta_profile + if not name: + available = self._store.list() + if not available: + raise FileNotFoundError( + "No meta-profile is active and none are available in the " + "meta-profile store." + ) + name = available[0] + logger.info( + "No active meta-profile set; falling back to first available: %r", + name, + ) + return self._store.load(name) def __call__( self, @@ -147,7 +177,13 @@ def __call__( is_error=True, ) - meta = self.meta_profile + try: + meta = self._resolve_meta_profile() + except (FileNotFoundError, ValueError) as exc: + return ClassifyAndSwitchLLMObservation.from_text( + text=f"Failed to resolve the active meta-profile: {exc}", + is_error=True, + ) # 1) Load the classifier LLM (a saved profile), respecting at-rest cipher. try: classifier_llm = conversation._profile_store.load( @@ -263,28 +299,17 @@ def create( "and 'meta_profile_store'." ) + # Meta-profile resolution is deferred to invocation time (see + # ClassifyAndSwitchLLMExecutor) so user-managed files under + # ~/.openhands/meta-profiles cannot break conversation startup; a + # missing/invalid profile surfaces as a tool error instead. store = meta_profile_store or MetaProfileStore() - name = active_meta_profile - if not name: - # No meta-profile is active: fall back to the first available one. - available = store.list() - if not available: - raise ValueError( - "ClassifyAndSwitchLLMTool requires at least one meta-profile, " - "but none were found in the meta-profile store." - ) - name = available[0] - logger.info( - "No active meta-profile set; falling back to first available: %r", - name, - ) - meta = store.load(name) return [ cls( description=_DESCRIPTION, action_type=ClassifyAndSwitchLLMAction, observation_type=ClassifyAndSwitchLLMObservation, - executor=ClassifyAndSwitchLLMExecutor(meta), + executor=ClassifyAndSwitchLLMExecutor(store, active_meta_profile), annotations=ToolAnnotations( readOnlyHint=False, destructiveHint=False, diff --git a/tests/agent_server/test_meta_profiles_router.py b/tests/agent_server/test_meta_profiles_router.py index 1951c3d224..de531eff57 100644 --- a/tests/agent_server/test_meta_profiles_router.py +++ b/tests/agent_server/test_meta_profiles_router.py @@ -9,7 +9,7 @@ from openhands.agent_server.api import create_app from openhands.agent_server.config import Config -from openhands.agent_server.persistence import reset_stores +from openhands.agent_server.persistence import get_settings_store, reset_stores from openhands.sdk.llm.meta_profile_store import MetaProfile, MetaProfileStore @@ -154,6 +154,24 @@ def test_delete_active_clears_active(client, store): assert listed["active_meta_profile"] is None +def test_delete_active_clears_nested_agent_settings(client, store): + """Deleting the active meta-profile must clear the *nested* agent settings. + + Otherwise the routing tool stays enabled pointing at a now-deleted profile, + which breaks the next conversation. Guards against top-level/nested drift. + """ + store.save("p", MetaProfile.model_validate(_meta())) + client.post("/api/meta-profiles/p/activate") + + client.delete("/api/meta-profiles/p") + + persisted = get_settings_store().load() + assert persisted is not None + agent = persisted.agent_settings + assert agent.active_meta_profile is None + assert agent.enable_classify_and_switch_llm_tool is False + + # ── Activate ───────────────────────────────────────────────────────────────── @@ -167,6 +185,25 @@ def test_activate_sets_active_meta_profile(client, store): assert listed["active_meta_profile"] == "p" +def test_activate_propagates_into_agent_settings(client, store): + """Activation must wire the *nested* agent settings, not just the facade. + + ``enable_classify_and_switch_llm_tool`` / ``active_meta_profile`` on + ``agent_settings`` are what actually attach the routing tool; if only the + top-level field is set, activation reports success but does nothing. + """ + store.save("p", MetaProfile.model_validate(_meta())) + response = client.post("/api/meta-profiles/p/activate") + assert response.status_code == 200 + + persisted = get_settings_store().load() + assert persisted is not None + assert persisted.active_meta_profile == "p" + agent = persisted.agent_settings + assert agent.active_meta_profile == "p" + assert agent.enable_classify_and_switch_llm_tool is True + + def test_activate_missing_returns_404(client): response = client.post("/api/meta-profiles/nope/activate") assert response.status_code == 404 diff --git a/tests/sdk/llm/test_meta_profile_store.py b/tests/sdk/llm/test_meta_profile_store.py index 890a1e9cfa..acc81fc452 100644 --- a/tests/sdk/llm/test_meta_profile_store.py +++ b/tests/sdk/llm/test_meta_profile_store.py @@ -1,4 +1,5 @@ import json +from concurrent.futures import ThreadPoolExecutor from pathlib import Path import pytest @@ -132,6 +133,34 @@ def test_save_respects_max_profiles(tmp_path: Path) -> None: assert store.load("a").classifier_model == "x" +def test_concurrent_saves_respect_max_profiles(tmp_path: Path) -> None: + """The file lock must serialize the limit check + write across threads. + + Without locking, concurrent creators can each pass the ``max_profiles`` + check before any write lands and overshoot the limit (TOCTOU). + """ + store = MetaProfileStore(base_dir=tmp_path) + limit = 10 + attempts = 30 + + def attempt(i: int) -> bool: + try: + store.save( + f"p{i:02d}", + MetaProfile(classifier_model="a", default_model="b"), + max_profiles=limit, + ) + return True + except MetaProfileLimitExceeded: + return False + + with ThreadPoolExecutor(max_workers=attempts) as pool: + results = list(pool.map(attempt, range(attempts))) + + assert sum(results) == limit + assert len(store.list()) == limit + + def test_delete_is_idempotent(tmp_path: Path) -> None: store = MetaProfileStore(base_dir=tmp_path) store.save("p", MetaProfile(classifier_model="a", default_model="b")) diff --git a/tests/sdk/tool/test_classify_and_switch_llm.py b/tests/sdk/tool/test_classify_and_switch_llm.py index 301b6acd27..b28a6e94f5 100644 --- a/tests/sdk/tool/test_classify_and_switch_llm.py +++ b/tests/sdk/tool/test_classify_and_switch_llm.py @@ -132,10 +132,11 @@ def test_recent_messages_text_takes_last_n() -> None: def test_create_falls_back_to_first_when_no_active_profile( meta_store: MetaProfileStore, ) -> None: - # "balanced" is the only profile, so it is the first one found. + # "balanced" is the only profile, so it is the first one resolved. + # Resolution is deferred to invocation time, so check the resolver directly. tool = ClassifyAndSwitchLLMTool.create(meta_profile_store=meta_store)[0] assert isinstance(tool.executor, ClassifyAndSwitchLLMExecutor) - assert tool.executor.meta_profile.classifier_model == "classifier" + assert tool.executor._resolve_meta_profile().classifier_model == "classifier" def test_create_picks_alphabetically_first_meta_profile( @@ -150,13 +151,28 @@ def test_create_picks_alphabetically_first_meta_profile( assert meta_store.list()[0] == "aaa" tool = ClassifyAndSwitchLLMTool.create(meta_profile_store=meta_store)[0] assert isinstance(tool.executor, ClassifyAndSwitchLLMExecutor) - assert tool.executor.meta_profile.classifier_model == "other-classifier" + assert tool.executor._resolve_meta_profile().classifier_model == "other-classifier" -def test_create_raises_when_no_meta_profiles_exist(tmp_path: Path) -> None: +def test_create_does_not_touch_disk_when_no_meta_profiles_exist( + tmp_path: Path, +) -> None: + # Deferred resolution: create() must not read the store, so an empty (or + # missing) meta-profile dir cannot break agent/conversation startup. empty_store = MetaProfileStore(base_dir=tmp_path / "empty") - with pytest.raises(ValueError): - ClassifyAndSwitchLLMTool.create(meta_profile_store=empty_store) + tools = ClassifyAndSwitchLLMTool.create(meta_profile_store=empty_store) + assert len(tools) == 1 + assert isinstance(tools[0].executor, ClassifyAndSwitchLLMExecutor) + + +def test_create_does_not_load_missing_active_meta_profile( + meta_store: MetaProfileStore, +) -> None: + # A dangling active_meta_profile must not raise at create() time. + tool = ClassifyAndSwitchLLMTool.create( + active_meta_profile="does-not-exist", meta_profile_store=meta_store + )[0] + assert isinstance(tool.executor, ClassifyAndSwitchLLMExecutor) def test_create_rejects_unknown_params(meta_store: MetaProfileStore) -> None: @@ -174,7 +190,7 @@ def test_create_loads_meta_profile(meta_store: MetaProfileStore) -> None: )[0] assert tool.name == "classify_and_switch_llm" assert isinstance(tool.executor, ClassifyAndSwitchLLMExecutor) - assert tool.executor.meta_profile.classifier_model == "classifier" + assert tool.executor._resolve_meta_profile().classifier_model == "classifier" # ── executor ────────────────────────────────────────────────────────────── @@ -261,6 +277,66 @@ def test_executor_errors_when_target_profile_missing( assert "not found" in obs.text +def test_executor_errors_when_no_meta_profile_available( + profile_store, tmp_path +) -> None: + # Empty store + no active profile: invocation (not startup) reports the error. + empty_store = MetaProfileStore(base_dir=tmp_path / "empty") + conversation = _make_conversation() + tool = ClassifyAndSwitchLLMTool.create(meta_profile_store=empty_store)[0] + conversation._ensure_agent_ready() + conversation.agent.tools_map[tool.name] = tool + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert obs.is_error + assert "no meta-profile" in obs.text.lower() + assert conversation.agent.llm.model == "default-model" + + +def test_executor_errors_when_active_meta_profile_missing( + profile_store, meta_store +) -> None: + # Dangling active_meta_profile: invocation reports the error, no crash. + conversation = _make_conversation() + tool = ClassifyAndSwitchLLMTool.create( + active_meta_profile="does-not-exist", meta_profile_store=meta_store + )[0] + conversation._ensure_agent_ready() + conversation.agent.tools_map[tool.name] = tool + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert obs.is_error + assert "resolve" in obs.text.lower() + + +def test_missing_active_meta_profile_does_not_break_startup( + meta_store, monkeypatch +) -> None: + # The whole point of deferring the load: a missing active meta-profile must + # not break _ensure_agent_ready() / conversation startup. + monkeypatch.setattr( + "openhands.sdk.llm.meta_profile_store._DEFAULT_META_PROFILE_DIR", + meta_store.base_dir, + ) + agent = OpenHandsAgentSettings( + llm=_make_llm("default-model", "default"), + enable_classify_and_switch_llm_tool=True, + active_meta_profile="does-not-exist", + ).create_agent() + conversation = LocalConversation(agent=agent, workspace=Path.cwd()) + + # Must not raise even though the active meta-profile file does not exist. + conversation._ensure_agent_ready() + + assert any(t.name == "ClassifyAndSwitchLLMTool" for t in agent.tools) + + # ── create_agent wiring ───────────────────────────────────────────────────── From 3333e48dd3b77c8d01deccabb2a70de9e270ecc3 Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 17 Jun 2026 16:18:01 -0300 Subject: [PATCH 5/8] fix(test): satisfy pyright after main merge in meta-profiles router tests The merge with main turned PersistedSettings.agent_settings into a kind-discriminated union (OpenHandsAgentSettings | ACPAgentSettings), so pyright rejected the direct access to active_meta_profile / enable_classify_and_switch_llm_tool on the union, and rejected the list[dict] passed to MetaProfile(classes=...). - Build the fixture profile via MetaProfile.model_validate({...}) so the classes payload is validated as Any (still coerced to MetaProfileClass). - Narrow agent_settings with isinstance(OpenHandsAgentSettings) before asserting the routing fields (the routing tool only applies to the OpenHands agent variant anyway). Co-authored-by: openhands --- tests/agent_server/test_meta_profiles_router.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/agent_server/test_meta_profiles_router.py b/tests/agent_server/test_meta_profiles_router.py index de531eff57..fdbf39a43b 100644 --- a/tests/agent_server/test_meta_profiles_router.py +++ b/tests/agent_server/test_meta_profiles_router.py @@ -11,6 +11,7 @@ from openhands.agent_server.config import Config from openhands.agent_server.persistence import get_settings_store, reset_stores from openhands.sdk.llm.meta_profile_store import MetaProfile, MetaProfileStore +from openhands.sdk.settings.model import OpenHandsAgentSettings @pytest.fixture @@ -49,10 +50,12 @@ def store(temp_meta_profiles_dir): def _meta(classifier="minimax", default="gpt", classes=None) -> dict: - return MetaProfile( - classifier_model=classifier, - default_model=default, - classes=classes or [{"description": "UI", "model": "deepseek"}], + return MetaProfile.model_validate( + { + "classifier_model": classifier, + "default_model": default, + "classes": classes or [{"description": "UI", "model": "deepseek"}], + } ).model_dump(mode="json") @@ -168,6 +171,7 @@ def test_delete_active_clears_nested_agent_settings(client, store): persisted = get_settings_store().load() assert persisted is not None agent = persisted.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) assert agent.active_meta_profile is None assert agent.enable_classify_and_switch_llm_tool is False @@ -200,6 +204,7 @@ def test_activate_propagates_into_agent_settings(client, store): assert persisted is not None assert persisted.active_meta_profile == "p" agent = persisted.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) assert agent.active_meta_profile == "p" assert agent.enable_classify_and_switch_llm_tool is True From 2f1f55e25fddfddf2370d042cdfa4bae432876ac Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 17 Jun 2026 17:05:51 -0300 Subject: [PATCH 6/8] refactor: address code-review feedback on intelligent model router Non-blocking review suggestions from PR #3744: - meta_profiles_router/meta_profile_store: trim verbose module docstrings that restated the PR description down to the durable invariants. - persistence/models.py: extract the active_meta_profile propagation out of PersistedSettings.update into a _apply_active_meta_profile helper to cut the nesting depth. - classify_and_switch_llm.py: document why _recent_messages_text only consumes MessageEvent, and that the no-active fallback picks the alphabetically-first meta-profile (not most-recent). - meta_profile_store.list(): document the alphabetical (not chronological) ordering that the fallback relies on. - tests: add a concurrent delete(active)+activate(other) race test asserting the deleted profile never stays active and the nested agent_settings never drift from the top-level active_meta_profile. Co-authored-by: openhands --- .../agent_server/meta_profiles_router.py | 8 ++-- .../agent_server/persistence/models.py | 32 ++++++++------- .../openhands/sdk/llm/meta_profile_store.py | 22 +++++------ .../tool/builtins/classify_and_switch_llm.py | 10 ++++- .../agent_server/test_meta_profiles_router.py | 39 +++++++++++++++++++ 5 files changed, 81 insertions(+), 30 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/meta_profiles_router.py b/openhands-agent-server/openhands/agent_server/meta_profiles_router.py index e24fc3b3a4..7a1279ec4e 100644 --- a/openhands-agent-server/openhands/agent_server/meta_profiles_router.py +++ b/openhands-agent-server/openhands/agent_server/meta_profiles_router.py @@ -1,9 +1,7 @@ -"""HTTP endpoints for managing meta-profiles (model-routing configurations). +"""HTTP CRUD + activate endpoints for meta-profiles (mirrors profiles_router). -A meta-profile names a ``classifier_model``, a ``default_model`` and a list of -task ``classes``. Every model reference is the name of a saved LLM profile, so -this router intentionally mirrors :mod:`profiles_router` but stores plain JSON -documents (no secrets) via :class:`MetaProfileStore`. +Unlike LLM profiles, meta-profiles hold no secrets — they are plain JSON +documents persisted via :class:`MetaProfileStore`. """ from collections.abc import Iterator diff --git a/openhands-agent-server/openhands/agent_server/persistence/models.py b/openhands-agent-server/openhands/agent_server/persistence/models.py index 87f7065539..620872b49c 100644 --- a/openhands-agent-server/openhands/agent_server/persistence/models.py +++ b/openhands-agent-server/openhands/agent_server/persistence/models.py @@ -255,25 +255,31 @@ def update(self, payload: SettingsUpdatePayload) -> None: if "active_profile" in payload: self.active_profile = payload["active_profile"] - # Update active_meta_profile and propagate it into agent_settings so - # the agent built from these settings enables and uses the routing - # tool (mirrors how activating a profile bakes the LLM into - # agent_settings). ACP agent settings lack these fields, so guard. + # Update active_meta_profile if explicitly provided (incl. None) if "active_meta_profile" in payload: - name = payload["active_meta_profile"] - self.active_meta_profile = name - if "active_meta_profile" in type(self.agent_settings).model_fields: - self.agent_settings = self.agent_settings.model_copy( - update={ - "active_meta_profile": name, - "enable_classify_and_switch_llm_tool": name is not None, - } - ) + self._apply_active_meta_profile(payload["active_meta_profile"]) finally: # Clear conv_merged to minimize plaintext exposure window if conv_merged is not None: conv_merged.clear() + def _apply_active_meta_profile(self, name: str | None) -> None: + """Set ``active_meta_profile`` and propagate it into agent_settings. + + Propagating into the nested ``agent_settings`` is what enables/uses the + routing tool on the agent built from these settings (mirrors how + activating a profile bakes the LLM into ``agent_settings``). ACP agent + settings lack these fields, so guard on their presence. + """ + self.active_meta_profile = name + if "active_meta_profile" in type(self.agent_settings).model_fields: + self.agent_settings = self.agent_settings.model_copy( + update={ + "active_meta_profile": name, + "enable_classify_and_switch_llm_tool": name is not None, + } + ) + @classmethod def from_persisted( cls, data: Any, *, context: dict[str, Any] | None = None diff --git a/openhands-sdk/openhands/sdk/llm/meta_profile_store.py b/openhands-sdk/openhands/sdk/llm/meta_profile_store.py index 73f9640baa..178aa9be6c 100644 --- a/openhands-sdk/openhands/sdk/llm/meta_profile_store.py +++ b/openhands-sdk/openhands/sdk/llm/meta_profile_store.py @@ -1,14 +1,9 @@ -"""Storage for *meta-profiles*: declarative model-routing configurations. +"""JSON-file storage for meta-profiles under ``~/.openhands/meta-profiles``. -A meta-profile describes how to pick an LLM for a task. It names a -``classifier_model`` used to categorize the task, a ``default_model`` to fall -back to, and a list of ``classes`` mapping a natural-language task description -to the model that should handle it. - -Every model reference (``classifier_model``, ``default_model`` and each -class's ``model``) is the *name of a saved LLM profile* in -:class:`~openhands.sdk.llm.llm_profile_store.LLMProfileStore`, so credentials -and provider settings are resolved through the existing profile machinery. +Key invariant: every model reference in a meta-profile (``classifier_model``, +``default_model``, and each class's ``model``) is the *name of a saved LLM +profile* in :class:`~openhands.sdk.llm.llm_profile_store.LLMProfileStore`, not a +raw model string — credentials/provider settings resolve through that store. """ from __future__ import annotations @@ -103,7 +98,12 @@ def _acquire_lock(self, timeout: float = _LOCK_TIMEOUT_SECONDS) -> Iterator[None ) def list(self) -> list[str]: - """Return the names (without ``.json``) of all stored meta-profiles.""" + """Return stored meta-profile names (without ``.json``), sorted. + + The order is alphabetical, not chronological. Callers that fall back to + "the first available" meta-profile (when none is explicitly active) get + the alphabetically-first name, which is stable but not "most recent". + """ return sorted( p.stem for p in self.base_dir.glob("*.json") diff --git a/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py b/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py index 8090ef7352..a90da4e0e0 100644 --- a/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py +++ b/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py @@ -116,7 +116,13 @@ def parse_class_index(text: str, num_classes: int) -> int: def _recent_messages_text( conversation: "LocalConversation", limit: int = _RECENT_MESSAGE_LIMIT ) -> str: - """Return a transcript of the last ``limit`` conversation messages.""" + """Return a transcript of the last ``limit`` conversation messages. + + Only ``MessageEvent`` is included on purpose: user/assistant messages are + the task signal the classifier needs. Action/observation events (tool calls, + outputs) are deliberately excluded — they are noisy, can be large, and don't + describe the task any better than the surrounding messages. + """ from openhands.sdk.event.llm_convertible.message import MessageEvent from openhands.sdk.llm import content_to_str @@ -157,6 +163,8 @@ def _resolve_meta_profile(self) -> MetaProfile: "No meta-profile is active and none are available in the " "meta-profile store." ) + # ``list()`` is alphabetically sorted, so this is the + # alphabetically-first meta-profile, not the most recently saved. name = available[0] logger.info( "No active meta-profile set; falling back to first available: %r", diff --git a/tests/agent_server/test_meta_profiles_router.py b/tests/agent_server/test_meta_profiles_router.py index fdbf39a43b..641a0a4a7c 100644 --- a/tests/agent_server/test_meta_profiles_router.py +++ b/tests/agent_server/test_meta_profiles_router.py @@ -1,6 +1,7 @@ """Tests for meta_profiles_router endpoints.""" import tempfile +from concurrent.futures import ThreadPoolExecutor from pathlib import Path from unittest.mock import patch @@ -212,3 +213,41 @@ def test_activate_propagates_into_agent_settings(client, store): def test_activate_missing_returns_404(client): response = client.post("/api/meta-profiles/nope/activate") assert response.status_code == 404 + + +def test_concurrent_delete_and_activate_keep_settings_consistent(client, store): + """Racing delete(active) against activate(other) must not corrupt settings. + + ``delete`` clears the active meta-profile and ``activate`` sets a new one; + both route through the file-locked settings store. Whatever the interleaving, + the persisted state must stay consistent: the active profile is never the + just-deleted one, and the nested ``agent_settings`` never drifts from the + top-level ``active_meta_profile``. + """ + store.save("q", MetaProfile.model_validate(_meta())) + + for _ in range(15): + # Re-establish "p" as the active profile before each race. + store.save("p", MetaProfile.model_validate(_meta())) + client.post("/api/meta-profiles/p/activate") + + with ThreadPoolExecutor(max_workers=2) as pool: + futures = [ + pool.submit(client.delete, "/api/meta-profiles/p"), + pool.submit(client.post, "/api/meta-profiles/q/activate"), + ] + for future in futures: + assert future.result().status_code == 200 + + persisted = get_settings_store().load() + assert persisted is not None + active = persisted.active_meta_profile + # "p" was deleted, so it must never remain active; outcome is "q" or None + # depending on which write committed last. + assert active in (None, "q") + + agent = persisted.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + # Nested agent settings must agree with the top-level facade. + assert agent.active_meta_profile == active + assert agent.enable_classify_and_switch_llm_tool is (active is not None) From 716e7dd5cf598bb22d4ac3097810c43aa2c363fd Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 17 Jun 2026 19:04:10 -0300 Subject: [PATCH 7/8] fix: account classifier LLM spend + map meta-profile store timeouts to 503 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback on the intelligent model router (PR #3744). CRITICAL — classifier completion now goes through conversation accounting. `classify_and_switch_llm` previously loaded the classifier profile straight from the profile store and called `completion()` on it without registering it in `conversation.llm_registry`. Its tokens/cost therefore never entered `conversation_stats`, so the call was invisible to serialized metrics and escaped the `max_budget_per_run` guard in `LocalConversation`. The classifier LLM is now loaded once and registered under a stable `classifier:` usage id (mirroring the existing `ask-agent-llm` pattern), so its spend is tracked and budget-enforced; repeated routing calls reuse the cached entry. SUGGESTION — meta-profile store lock contention now returns 503. `MetaProfileStore.save()/delete()` can raise `TimeoutError` from the file lock, but `meta_profiles_router._store_errors()` only mapped `ValueError`, yielding a generic 500. It now mirrors `profiles_router._store_errors()` and maps `TimeoutError` to a retryable 503. Tests: - classifier call is recorded in `conversation.conversation_stats` and the registry (regression for the budget bypass), plus a repeated-call test asserting one shared usage bucket. - save/delete endpoints return 503 on store `TimeoutError`. Co-authored-by: openhands --- .../agent_server/meta_profiles_router.py | 10 ++++- .../tool/builtins/classify_and_switch_llm.py | 36 ++++++++++----- .../agent_server/test_meta_profiles_router.py | 24 ++++++++++ .../sdk/tool/test_classify_and_switch_llm.py | 44 +++++++++++++++++++ 4 files changed, 101 insertions(+), 13 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/meta_profiles_router.py b/openhands-agent-server/openhands/agent_server/meta_profiles_router.py index 7a1279ec4e..251ac3d3e1 100644 --- a/openhands-agent-server/openhands/agent_server/meta_profiles_router.py +++ b/openhands-agent-server/openhands/agent_server/meta_profiles_router.py @@ -66,9 +66,17 @@ class ActivateMetaProfileResponse(BaseModel): @contextmanager def _store_errors() -> Iterator[None]: - """Map ``MetaProfileStore`` validation errors to HTTP responses.""" + """Map ``MetaProfileStore`` errors to HTTP responses.""" try: yield + except TimeoutError: + # save()/delete() can raise TimeoutError from the file lock under + # contention; surface a retryable 503 instead of a generic 500 + # (mirrors profiles_router._store_errors()). + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Meta-profile store is busy. Please retry.", + ) except ValueError as e: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, diff --git a/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py b/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py index a90da4e0e0..5d0e1504d2 100644 --- a/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py +++ b/openhands-sdk/openhands/sdk/tool/builtins/classify_and_switch_llm.py @@ -192,19 +192,31 @@ def __call__( text=f"Failed to resolve the active meta-profile: {exc}", is_error=True, ) - # 1) Load the classifier LLM (a saved profile), respecting at-rest cipher. + # 1) Load the classifier LLM (a saved profile), respecting at-rest cipher, + # and register it in the conversation's LLM registry under a stable + # usage id. Registration is what makes the classifier completion's + # tokens/cost flow into ``conversation.conversation_stats`` and count + # against ``max_budget_per_run`` — calling an unregistered LLM would + # spend off the books (mirrors the ``ask-agent-llm`` pattern). Caching + # by usage id means repeated routing calls reuse one metrics bucket. + usage_id = f"classifier:{meta.classifier_model}" try: - classifier_llm = conversation._profile_store.load( - meta.classifier_model, cipher=conversation._cipher - ) - except (FileNotFoundError, ValueError) as exc: - return ClassifyAndSwitchLLMObservation.from_text( - text=( - f"Failed to load classifier profile " - f"'{meta.classifier_model}': {exc}" - ), - is_error=True, - ) + classifier_llm = conversation.llm_registry.get(usage_id) + except KeyError: + try: + loaded = conversation._profile_store.load( + meta.classifier_model, cipher=conversation._cipher + ) + except (FileNotFoundError, ValueError) as exc: + return ClassifyAndSwitchLLMObservation.from_text( + text=( + f"Failed to load classifier profile " + f"'{meta.classifier_model}': {exc}" + ), + is_error=True, + ) + classifier_llm = loaded.model_copy(update={"usage_id": usage_id}) + conversation.llm_registry.add(classifier_llm) # 2) Single classifier call over the recent conversation. transcript = _recent_messages_text(conversation) or "(no messages yet)" diff --git a/tests/agent_server/test_meta_profiles_router.py b/tests/agent_server/test_meta_profiles_router.py index 641a0a4a7c..9a76d411c4 100644 --- a/tests/agent_server/test_meta_profiles_router.py +++ b/tests/agent_server/test_meta_profiles_router.py @@ -132,6 +132,18 @@ def test_save_invalid_name_returns_422(client): assert response.status_code == 422 +def test_save_timeout_returns_503(client, monkeypatch): + """Save surfaces store lock TimeoutError as a retryable 503, not a 500.""" + + def boom(self, name, meta_profile, *, max_profiles=None): + raise TimeoutError("locked") + + monkeypatch.setattr(MetaProfileStore, "save", boom) + + response = client.post("/api/meta-profiles/anything", json=_meta()) + assert response.status_code == 503 + + # ── Delete ─────────────────────────────────────────────────────────────────── @@ -147,6 +159,18 @@ def test_delete_is_idempotent(client): assert response.status_code == 200 +def test_delete_timeout_returns_503(client, monkeypatch): + """Delete surfaces store lock TimeoutError as a retryable 503, not a 500.""" + + def boom(self, name): + raise TimeoutError("locked") + + monkeypatch.setattr(MetaProfileStore, "delete", boom) + + response = client.delete("/api/meta-profiles/anything") + assert response.status_code == 503 + + def test_delete_active_clears_active(client, store): store.save("p", MetaProfile.model_validate(_meta())) activate = client.post("/api/meta-profiles/p/activate") diff --git a/tests/sdk/tool/test_classify_and_switch_llm.py b/tests/sdk/tool/test_classify_and_switch_llm.py index b28a6e94f5..1933d409d7 100644 --- a/tests/sdk/tool/test_classify_and_switch_llm.py +++ b/tests/sdk/tool/test_classify_and_switch_llm.py @@ -224,6 +224,50 @@ def test_executor_switches_to_matched_class( assert conversation.agent.llm.model == "fast-model" +def test_classifier_call_is_accounted_in_conversation_stats( + profile_store, meta_store, monkeypatch +) -> None: + """The classifier completion must spend through the conversation registry. + + Regression for the budget-bypass bug: the classifier LLM has to be + registered in ``conversation.llm_registry`` (under a stable usage id) so its + tokens/cost land in ``conversation_stats`` and count against + ``max_budget_per_run`` — an unregistered LLM would spend off the books. + """ + conversation = _make_conversation() + _register_tool(conversation, meta_store) + _patch_classifier(conversation, monkeypatch, reply="1") + + usage_id = "classifier:classifier" + assert usage_id not in conversation.conversation_stats.usage_to_metrics + + obs = conversation.execute_tool( + "classify_and_switch_llm", ClassifyAndSwitchLLMAction() + ) + + assert isinstance(obs, ClassifyAndSwitchLLMObservation) + assert not obs.is_error + # The classifier LLM is now both registered and tracked by the stats, so + # its spend is included in the combined (budget-enforced) metrics. + assert usage_id in conversation.llm_registry.list_usage_ids() + assert usage_id in conversation.conversation_stats.usage_to_metrics + + +def test_repeated_routing_reuses_one_classifier_usage_bucket( + profile_store, meta_store, monkeypatch +) -> None: + """A second routing call must reuse the cached classifier, not re-register.""" + conversation = _make_conversation() + _register_tool(conversation, meta_store) + _patch_classifier(conversation, monkeypatch, reply="1") + + conversation.execute_tool("classify_and_switch_llm", ClassifyAndSwitchLLMAction()) + conversation.execute_tool("classify_and_switch_llm", ClassifyAndSwitchLLMAction()) + + usage_ids = conversation.llm_registry.list_usage_ids() + assert usage_ids.count("classifier:classifier") == 1 + + def test_executor_falls_back_to_default_when_no_class( profile_store, meta_store, monkeypatch ) -> None: From 99ec8779467303c0dc549caaffec40e409fb2c6c Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 18 Jun 2026 17:38:34 -0300 Subject: [PATCH 8/8] fix: keep active_meta_profile a strict facade for routing-capable agents Addresses review feedback on PR #3744: activating a meta-profile set the top-level active_meta_profile even when agent_settings was a non-routing variant (e.g. ACPAgentSettings), which never attaches ClassifyAndSwitchLLMTool and lacks the active_meta_profile / enable_classify_and_switch_llm_tool fields. The persisted state could then claim an active router that no conversation could use, and switching agent kinds while a meta-profile was active left the facade stale. The top-level field is now treated as a strict facade for the nested state: - _apply_active_meta_profile() drops the request and clears the facade when the current agent variant cannot support routing; it mirrors into agent_settings only for routing-capable variants. - PersistedSettings.update() reconciles after applying an agent_settings diff, so switching to a non-routing variant clears the stale facade even when the payload does not touch active_meta_profile. Tests: activation on an ACP agent leaves the facade clear; switching to ACP while active clears it; switching back to OpenHands can re-activate. Co-authored-by: openhands --- .../agent_server/persistence/models.py | 49 +++++++++++++++---- .../agent_server/test_active_meta_profile.py | 47 +++++++++++++++++- 2 files changed, 86 insertions(+), 10 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/persistence/models.py b/openhands-agent-server/openhands/agent_server/persistence/models.py index 620872b49c..2201c48051 100644 --- a/openhands-agent-server/openhands/agent_server/persistence/models.py +++ b/openhands-agent-server/openhands/agent_server/persistence/models.py @@ -258,27 +258,58 @@ def update(self, payload: SettingsUpdatePayload) -> None: # Update active_meta_profile if explicitly provided (incl. None) if "active_meta_profile" in payload: self._apply_active_meta_profile(payload["active_meta_profile"]) + + # Enforce the invariant even when only ``agent_settings`` changed: + # switching to an agent variant that cannot attach the routing tool + # (e.g. ACP) must not leave a stale top-level ``active_meta_profile`` + # claiming routing is active. (No-op when active already matches.) + self._clear_active_meta_profile_if_unsupported() finally: # Clear conv_merged to minimize plaintext exposure window if conv_merged is not None: conv_merged.clear() + def _agent_supports_routing(self) -> bool: + """Whether the current agent variant can attach the routing tool. + + OpenHands agent settings expose ``active_meta_profile`` / + ``enable_classify_and_switch_llm_tool``; ACP (and any other) variants do + not and never attach :class:`ClassifyAndSwitchLLMTool`. + """ + return "active_meta_profile" in type(self.agent_settings).model_fields + def _apply_active_meta_profile(self, name: str | None) -> None: """Set ``active_meta_profile`` and propagate it into agent_settings. Propagating into the nested ``agent_settings`` is what enables/uses the routing tool on the agent built from these settings (mirrors how - activating a profile bakes the LLM into ``agent_settings``). ACP agent - settings lack these fields, so guard on their presence. + activating a profile bakes the LLM into ``agent_settings``). + + The top-level field is a strict facade for the nested state, not a + best-effort hint: if the current agent variant cannot attach the routing + tool (e.g. ACP), the request is dropped and the facade cleared so + persisted state never claims an active router no conversation can use. """ + if not self._agent_supports_routing(): + self.active_meta_profile = None + return self.active_meta_profile = name - if "active_meta_profile" in type(self.agent_settings).model_fields: - self.agent_settings = self.agent_settings.model_copy( - update={ - "active_meta_profile": name, - "enable_classify_and_switch_llm_tool": name is not None, - } - ) + self.agent_settings = self.agent_settings.model_copy( + update={ + "active_meta_profile": name, + "enable_classify_and_switch_llm_tool": name is not None, + } + ) + + def _clear_active_meta_profile_if_unsupported(self) -> None: + """Clear the facade when the agent variant cannot support routing. + + Guards the agent-kind-switch path: changing ``agent_settings`` to a + non-routing variant (e.g. ACP) while a meta-profile is active would + otherwise leave the top-level ``active_meta_profile`` stale. + """ + if not self._agent_supports_routing() and self.active_meta_profile is not None: + self.active_meta_profile = None @classmethod def from_persisted( diff --git a/tests/agent_server/test_active_meta_profile.py b/tests/agent_server/test_active_meta_profile.py index 3631cc996c..6af08a9ed5 100644 --- a/tests/agent_server/test_active_meta_profile.py +++ b/tests/agent_server/test_active_meta_profile.py @@ -1,7 +1,7 @@ """Tests for active_meta_profile persistence and propagation (Option A).""" from openhands.agent_server.persistence.models import PersistedSettings -from openhands.sdk.settings.model import OpenHandsAgentSettings +from openhands.sdk.settings.model import ACPAgentSettings, OpenHandsAgentSettings def test_update_sets_top_level_and_propagates_into_agent_settings() -> None: @@ -43,6 +43,51 @@ def test_update_without_active_meta_profile_leaves_it_unchanged() -> None: assert agent.active_meta_profile == "balanced" +def test_activate_on_acp_agent_does_not_set_facade() -> None: + """ACP agents cannot attach the routing tool, so the facade must stay clear. + + Otherwise ``GET /api/meta-profiles`` would report an active router that no + conversation built from the persisted ACP settings can actually use. + """ + settings = PersistedSettings(agent_settings=ACPAgentSettings()) + + settings.update({"active_meta_profile": "balanced"}) + + # Request dropped: the facade is not set and the variant is untouched. + assert settings.active_meta_profile is None + assert isinstance(settings.agent_settings, ACPAgentSettings) + + +def test_switching_to_acp_clears_active_meta_profile() -> None: + """Switching agent kind to ACP while a meta-profile is active clears it.""" + settings = PersistedSettings() + settings.update({"active_meta_profile": "balanced"}) + assert settings.active_meta_profile == "balanced" + + # Switch agent kind to ACP via an agent-settings diff (no meta-profile key). + settings.update({"agent_settings_diff": {"agent_kind": "acp"}}) + + assert settings.active_meta_profile is None + assert isinstance(settings.agent_settings, ACPAgentSettings) + + +def test_switching_back_to_openhands_can_reactivate() -> None: + """After an ACP round-trip, an OpenHands agent can activate routing again.""" + settings = PersistedSettings(agent_settings=ACPAgentSettings()) + settings.update({"active_meta_profile": "balanced"}) + assert settings.active_meta_profile is None + + # Switch back to an OpenHands agent and activate. + settings.update({"agent_settings_diff": {"agent_kind": "openhands"}}) + settings.update({"active_meta_profile": "balanced"}) + + assert settings.active_meta_profile == "balanced" + agent = settings.agent_settings + assert isinstance(agent, OpenHandsAgentSettings) + assert agent.active_meta_profile == "balanced" + assert agent.enable_classify_and_switch_llm_tool is True + + def test_active_meta_profile_round_trips_through_serialization() -> None: settings = PersistedSettings() settings.update({"active_meta_profile": "balanced"})