From a1ac5b3334c153cb330d15aed61fc1a67fb7d73a Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Thu, 7 May 2026 03:40:58 +0530 Subject: [PATCH 1/9] perf(agent): orchestrator harness efficiency improvements (#1141) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Re-enable payload summarizer at 4,000-token threshold (was disabled at 0 after recursive-dispatch was observed; root cause was the missing `omit_skills_catalog = true` guard on the summarizer archetype) - Skip `render_tools()` catalogue for native function-calling sessions (provider already sends JSON schemas in the API payload — duplicate in the system prompt was pure token bloat) - Consolidate 6 separate `memory_tree_*` tools into one `memory_tree` tool dispatched by `mode` enum, shrinking the orchestrator's tool surface by 5 entries per API call - Remove `schedule` and `spawn_subagent` from orchestrator tool list; `cron_add/cron_list/cron_remove` are the canonical scheduling surface and `delegate_*` + `spawn_worker_thread` replace `spawn_subagent` - Remove redundant "Available Sub-Agents" and "Direct Tools" tables from orchestrator prompt (LLM already sees these as native API tool schemas) - Compact `render_delegation_guide` to a single line per toolkit instead of a verbose multi-line spawn_subagent snippet - Add `max_result_chars` to `AgentDefinition`; set 8k for planner and researcher, 16k for code_executor to prevent context flooding - Wrap `ParentExecutionContext::memory_context` in `Arc>` so sub-agent spawns share the same allocation instead of cloning - Add `docs/DELEGATION_POLICY.md` codifying the 4-tier direct-first decision tree - Unit tests for all new behavior (ToolsSection native skip, memory_tree dispatcher modes, max_result_chars truncation, delegation guide compact format) Closes #1141 --- docs/DELEGATION_POLICY.md | 31 +++ .../agent/agents/code_executor/agent.toml | 1 + .../agent/agents/orchestrator/agent.toml | 16 +- .../agent/agents/orchestrator/prompt.md | 63 ++---- .../agent/agents/orchestrator/prompt.rs | 42 ++-- src/openhuman/agent/agents/planner/agent.toml | 1 + .../agent/agents/researcher/agent.toml | 1 + src/openhuman/agent/harness/definition.rs | 8 + .../agent/harness/definition_tests.rs | 1 + src/openhuman/agent/harness/fork_context.rs | 4 +- .../agent/harness/payload_summarizer.rs | 1 + src/openhuman/agent/harness/session/turn.rs | 2 +- .../agent/harness/subagent_runner/ops.rs | 18 +- .../harness/subagent_runner/ops_tests.rs | 36 +++- src/openhuman/agent/prompts/mod.rs | 6 + src/openhuman/agent/prompts/mod_tests.rs | 79 ++++++- src/openhuman/agent/triage/escalation.rs | 2 +- src/openhuman/channels/runtime/dispatch.rs | 1 + src/openhuman/config/schema/context.rs | 16 +- .../tools/impl/agent/spawn_worker_thread.rs | 2 +- src/openhuman/tools/impl/memory/tree/mod.rs | 192 ++++++++++++++++-- src/openhuman/tools/ops.rs | 7 +- src/openhuman/tools/orchestrator_tools.rs | 1 + tests/agent_harness_public.rs | 2 +- tests/calendar_grounding_e2e.rs | 2 +- 25 files changed, 407 insertions(+), 128 deletions(-) create mode 100644 docs/DELEGATION_POLICY.md diff --git a/docs/DELEGATION_POLICY.md b/docs/DELEGATION_POLICY.md new file mode 100644 index 000000000..7b163a60b --- /dev/null +++ b/docs/DELEGATION_POLICY.md @@ -0,0 +1,31 @@ +# Delegation Policy + +## When to delegate vs. act directly + +The orchestrator follows a direct-first policy. This document codifies the four-tier decision tree the orchestrator applies to every user message. + +## Tier 1 — Reply directly (no tools) +Apply when: small talk, simple factual Q&A, acknowledgements, clarification requests, context already in the system prompt. +Cost: 0 tokens (output only). +Rule: if you can answer without calling any tool, do so. + +## Tier 2 — Use a direct tool +Apply when: the task needs a tool but not specialised execution (time lookup, memory read/write, cron scheduling, workspace state, listing connections). +Cost: 1 tool call + parse overhead (~200-400 tokens). +Rule: prefer `current_time`, `cron_*`, `memory_*`, `memory_tree`, `read_workspace_state`, `composio_list_connections`, `ask_user_clarification`. + +## Tier 3 — Spawn a sub-agent (inline) +Apply when: the task requires specialised execution (writing code, crawling docs, running shell, calling an external integration) that the orchestrator cannot do directly. +Cost: full sub-agent turn (~1-5k tokens depending on archetype). +Rule: spawn the narrowest archetype that can complete the task. Prefer inline spawn (`spawn_worker_thread` with no dedicated thread) for tasks that complete in <5 turns. + +## Tier 4 — Spawn a dedicated worker thread +Apply when: the task is long (>5 turns estimated), produces a large transcript, or the user explicitly wants it tracked as a separate thread. +Cost: same as Tier 3 but the parent thread is not flooded. +Rule: use `spawn_worker_thread` and surface a brief summary back to the parent. Do not chain workers (workers cannot spawn workers). + +## Anti-patterns to avoid +- Spawning a sub-agent to answer a question the orchestrator already has context for. +- Delegating a tool call to a sub-agent when `current_tier <= 2` applies. +- Using `spawn_subagent` when `delegate_{archetype}` covers the task — `delegate_*` tools carry the full archetype definition and have correct tool filtering pre-configured. +- Passing the entire parent conversation as context to a sub-agent — pass only the task-relevant slice. diff --git a/src/openhuman/agent/agents/code_executor/agent.toml b/src/openhuman/agent/agents/code_executor/agent.toml index 3ee1c5d6f..1482d31c1 100644 --- a/src/openhuman/agent/agents/code_executor/agent.toml +++ b/src/openhuman/agent/agents/code_executor/agent.toml @@ -4,6 +4,7 @@ delegate_name = "run_code" when_to_use = "Sandboxed developer — writes, runs, and debugs code until tests pass. Use for any task that requires producing or modifying source files and exercising them with shell or test commands." temperature = 0.4 max_iterations = 10 +max_result_chars = 16000 sandbox_mode = "sandboxed" omit_identity = true omit_memory_context = true diff --git a/src/openhuman/agent/agents/orchestrator/agent.toml b/src/openhuman/agent/agents/orchestrator/agent.toml index f8946e008..e540f0904 100644 --- a/src/openhuman/agent/agents/orchestrator/agent.toml +++ b/src/openhuman/agent/agents/orchestrator/agent.toml @@ -59,9 +59,7 @@ hint = "reasoning" [tools] # Direct tools — things the orchestrator calls itself rather than -# delegating. `spawn_subagent` is retained as an advanced fallback so -# power users can still spawn arbitrary agent ids that are not listed -# in `subagents` above (e.g. workspace-override custom agents). +# delegating. # # `composio_list_connections` is the orchestrator's only composio_* # tool: it exists so the agent can detect newly-authorised integrations @@ -74,28 +72,20 @@ named = [ "query_memory", "memory_store", "memory_forget", - "memory_tree_search_entities", - "memory_tree_query_topic", - "memory_tree_query_source", - "memory_tree_query_global", - "memory_tree_drill_down", - "memory_tree_fetch_leaves", + "memory_tree", "read_workspace_state", "ask_user_clarification", - "spawn_subagent", "spawn_worker_thread", "composio_list_connections", # Time + scheduling — lets the orchestrator answer "what time is it", # "remind me in 10 minutes", "every morning at 8" directly rather than # delegating or telling the user it can't. `current_time` grounds # relative-time parsing; `cron_add` / `cron_list` / `cron_remove` - # manage recurring + one-shot agent/shell jobs; `schedule` is the - # simpler shell-only alias for one-shot reminders. + # manage recurring + one-shot agent/shell jobs. "current_time", "cron_add", "cron_list", "cron_remove", - "schedule", # Coding-harness coordination primitives from #1208. `todowrite` # gives the orchestrator a shared todo store to track multi-step # work across delegations; `plan_exit` is the stable marker that diff --git a/src/openhuman/agent/agents/orchestrator/prompt.md b/src/openhuman/agent/agents/orchestrator/prompt.md index af81f4f83..23c04c66e 100644 --- a/src/openhuman/agent/agents/orchestrator/prompt.md +++ b/src/openhuman/agent/agents/orchestrator/prompt.md @@ -30,43 +30,7 @@ Follow this sequence for every user message: Default bias: **do not spawn a sub-agent when a direct response or direct tool call is sufficient**. -## Available Sub-Agents - -| Archetype | When to Use | -| ----------------- | -------------------------------------------------------------------------- | -| **Planner** | Complex tasks that need a multi-step plan before execution. | -| **Code Executor** | Writing, modifying, or running code. Runs sandboxed. | -| **Skills Agent** | Interacting with connected services (Notion, Gmail, etc.) via skill tools. | -| **Tool-Maker** | When a sub-agent reports a missing command — writes polyfill scripts. | -| **Researcher** | Finding information in docs, web, or files. Compresses to dense markdown. | -| **Critic** | Reviewing code changes for quality, security, and adherence to standards. | - -## Direct Tools (call these yourself — no delegation needed) - -Some capabilities are cheap, read-only, or purely declarative — delegating them -to a sub-agent wastes a turn. Use these directly: - -| Tool | When to use | -| --------------------------- | --------------------------------------------------------------------------------------------------------- | -| `current_time` | Any time the user refers to "now", "in 10 minutes", "tomorrow", "tonight", or before scheduling anything. | -| `cron_add` / `cron_list` / `cron_remove` | Reminders, recurring tasks, follow-ups. Use `job_type: "agent"` with a `prompt` to have a future agent run fire (e.g. send a pushover reminder). Use cron expressions for recurring, `at` for one-shot absolute times, `every` for fixed intervals. | -| `schedule` | Lightweight alias for one-shot shell reminders. Prefer `cron_add` with `job_type: "agent"` for anything that should produce a user-visible message. | -| `query_memory` | Pull long-term user context (preferences, past conversations, saved notes) before answering personal questions. | -| `memory_store` / `memory_forget` | Persist a fact the user asked you to remember, or drop one they asked you to forget. | -| `read_workspace_state` | Get git status + file tree before planning a code task. | -| `composio_list_connections` | Check which external integrations (Gmail, Notion, GitHub, …) the user has authorised *right now*. Session-start list may be stale. | -| `ask_user_clarification` | Ask one focused question when the request is ambiguous — don't guess. | -| `spawn_subagent` | Inline delegation: the sub-agent's work is collapsed into a single result in this thread. Use for quick tasks. | -| `spawn_worker_thread` | Dedicated delegation: creates a fresh 'worker' thread for the sub-agent. Use for long, complex, or multi-step tasks to avoid cluttering the parent thread. | - -**Scheduling rule of thumb.** To "remind me in 10 minutes", call `current_time` -first. If `cron_add` is available and enabled for this runtime, then call -`cron_add` with `schedule = {kind:"at", at:""}`, `job_type:"agent"`, -and a `prompt` that tells a future agent what to deliver (e.g. "Send pushover: -'stand up and stretch'"). If `cron_add` is disabled by config, absent from your -tool list, or returns an error, do not promise the reminder: tell the user you -can't schedule it in this environment and, if helpful, provide the computed time -or a manual fallback. +When delegating: use `delegate_researcher` for web/doc lookups, `delegate_run_code` for coding, `delegate_plan` for complex decomposition, `delegate_critic` for reviews, `delegate_archivist` for memory writes, `delegate_{toolkit}` for external integrations. Use `spawn_worker_thread` for long tasks that need their own thread. ## Rules @@ -77,6 +41,15 @@ or a manual fallback. - **Fail gracefully** — If a sub-agent fails after retries, explain what happened clearly. - **Escalate when appropriate** — If orchestration is the wrong mode or a specialist cannot make progress, hand control back to OpenHuman Core with a concise explanation and let Core handle general interactions. +**Scheduling rule of thumb.** To "remind me in 10 minutes", call `current_time` +first. If `cron_add` is available and enabled for this runtime, then call +`cron_add` with `schedule = {kind:"at", at:""}`, `job_type:"agent"`, +and a `prompt` that tells a future agent what to deliver (e.g. "Send pushover: +'stand up and stretch'"). If `cron_add` is disabled by config, absent from your +tool list, or returns an error, do not promise the reminder: tell the user you +can't schedule it in this environment and, if helpful, provide the computed time +or a manual fallback. + ## Dedicated worker threads `spawn_subagent` accepts an optional `dedicated_thread: true` flag. When set, the @@ -146,16 +119,16 @@ User: what time is it? ## Memory tree retrieval -Six tools query the user's ingested email/chat/document memory: +Use `memory_tree` with a `mode` argument to query the user's ingested email/chat/document history: -- `memory_tree_search_entities(query)` — resolve a name to a canonical id (e.g. "alice" → `email:alice@example.com`). ALWAYS call this first when the user mentions someone by name. -- `memory_tree_query_topic(entity_id, query?)` — all mentions of an entity, cross-source. Pass `query` for semantic rerank. -- `memory_tree_query_source(source_kind?, time_window_days?, query?)` — filter by source type (chat/email/document) and time window. Use for "in my email last week…" intents. -- `memory_tree_query_global(window_days)` — cross-source daily digest (the 7-day digest is pre-loaded into context on session start and refreshed every ~30 min, so only call this for a different window or to refresh on demand). -- `memory_tree_drill_down(node_id)` — when a summary is too coarse, expand it one level. -- `memory_tree_fetch_leaves(chunk_ids)` — pull raw chunks for citation. +- `mode: "search_entities"` — resolve a name to a canonical id (e.g. "alice" → `email:alice@example.com`). ALWAYS call this first when the user mentions someone by name. +- `mode: "query_topic"` — all cross-source mentions of an `entity_id` from `search_entities`. +- `mode: "query_source"` — filter by `source_kind` (chat/email/document) and `time_window_days`. Use for "in my email last week…" intents. +- `mode: "query_global"` — cross-source daily digest over `time_window_days` (7-day digest is pre-loaded into context on session start — only call for a different window or to force refresh). +- `mode: "drill_down"` — expand a coarse `node_id` summary one level. +- `mode: "fetch_leaves"` — pull raw `chunk_ids` for citation. -Top-down expansion is the cost-control story: start with cheap summaries (`query_*`), only call `drill_down` / `fetch_leaves` when the user wants details or you need a quote. +Start cheap (query_* summaries), only drill_down/fetch_leaves when you need verbatim content. ## Citations diff --git a/src/openhuman/agent/agents/orchestrator/prompt.rs b/src/openhuman/agent/agents/orchestrator/prompt.rs index 9f1b3003f..2e043afec 100644 --- a/src/openhuman/agent/agents/orchestrator/prompt.rs +++ b/src/openhuman/agent/agents/orchestrator/prompt.rs @@ -75,21 +75,14 @@ fn render_delegation_guide(integrations: &[ConnectedIntegration]) -> String { return String::new(); } let mut out = String::from( - "## Delegation Guide — Integrations\n\n\ - For any task that touches one of these external services, \ - delegate to `integrations_agent` with the matching `toolkit` \ - argument. The sub-agent receives the full action catalogue \ - for that integration as native tool schemas — do not attempt \ - to call integration actions directly from this agent.\n\n\ - Only the integrations listed below are currently authorised. \ - If the user asks about another service, tell them to connect \ - it in **Skills** page before retrying.\n\n", + "## Connected Integrations\n\n\ + Delegate tasks for these services using the matching `delegate_{toolkit}` tool:\n\n", ); for ci in connected { let _ = writeln!( out, - "- **{}** — {}\n Delegate with: `spawn_subagent(agent_id=\"integrations_agent\", toolkit=\"{}\", prompt=)`", - ci.toolkit, ci.description, ci.toolkit, + "- **{}** (delegate via `delegate_{}`): {}", + ci.toolkit, ci.toolkit, ci.description ); } out @@ -127,7 +120,7 @@ mod tests { fn build_returns_nonempty_body() { let body = build(&ctx_with(&[])).unwrap(); assert!(!body.is_empty()); - assert!(!body.contains("## Delegation Guide")); + assert!(!body.contains("## Connected Integrations")); } #[test] @@ -154,14 +147,29 @@ mod tests { connected: true, }]; let body = build(&ctx_with(&integrations)).unwrap(); - assert!(body.contains("## Delegation Guide — Integrations")); - assert!(body.contains( - "spawn_subagent(agent_id=\"integrations_agent\", toolkit=\"gmail\", prompt=)" - )); + assert!(body.contains("## Connected Integrations")); + assert!(body.contains("delegate_gmail")); + // Must NOT contain the old verbose spawn_subagent snippet. + assert!(!body.contains("spawn_subagent(agent_id=\"integrations_agent\"")); // Delegator voice must NOT use the skill-executor wording. assert!(!body.contains("You have direct access")); } + #[test] + fn delegation_guide_uses_compact_delegate_format() { + let integrations = vec![ConnectedIntegration { + toolkit: "gmail".into(), + description: "Email access.".into(), + tools: Vec::new(), + connected: true, + }]; + let body = build(&ctx_with(&integrations)).unwrap(); + assert!(body.contains("## Connected Integrations")); + assert!(body.contains("delegate_gmail")); + // Must NOT contain the old verbose spawn_subagent snippet. + assert!(!body.contains("spawn_subagent(agent_id=\"integrations_agent\"")); + } + #[test] fn build_hides_unconnected_integrations() { // Only connected toolkits make it into the Delegation Guide @@ -196,6 +204,6 @@ mod tests { connected: false, }]; let body = build(&ctx_with(&integrations)).unwrap(); - assert!(!body.contains("## Delegation Guide")); + assert!(!body.contains("## Connected Integrations")); } } diff --git a/src/openhuman/agent/agents/planner/agent.toml b/src/openhuman/agent/agents/planner/agent.toml index 08f9bd895..722948da2 100644 --- a/src/openhuman/agent/agents/planner/agent.toml +++ b/src/openhuman/agent/agents/planner/agent.toml @@ -4,6 +4,7 @@ delegate_name = "plan" when_to_use = "Architect — break a complex task into a small DAG of subtasks with explicit acceptance criteria. Reads memory and searches the web to ground plans in real context. Read-only; produces JSON, not code." temperature = 0.4 max_iterations = 8 +max_result_chars = 8000 sandbox_mode = "read_only" omit_identity = true omit_memory_context = false diff --git a/src/openhuman/agent/agents/researcher/agent.toml b/src/openhuman/agent/agents/researcher/agent.toml index 191e9ecc4..b496f2809 100644 --- a/src/openhuman/agent/agents/researcher/agent.toml +++ b/src/openhuman/agent/agents/researcher/agent.toml @@ -4,6 +4,7 @@ delegate_name = "research" when_to_use = "Web & docs crawler — reads real documentation, compresses to dense markdown. Use for any task that requires looking up external knowledge." temperature = 0.4 max_iterations = 8 +max_result_chars = 8000 sandbox_mode = "none" omit_identity = true omit_memory_context = true diff --git a/src/openhuman/agent/harness/definition.rs b/src/openhuman/agent/harness/definition.rs index 864ef757b..d71f6fc81 100644 --- a/src/openhuman/agent/harness/definition.rs +++ b/src/openhuman/agent/harness/definition.rs @@ -127,6 +127,14 @@ pub struct AgentDefinition { #[serde(default = "defaults::max_iterations")] pub max_iterations: usize, + /// Maximum character length for this sub-agent's output before the + /// harness truncates it before feeding it back as a tool result to the + /// parent. `None` means no cap (the default for most agents). Set to + /// a value for research/planner/code agents to prevent context flooding + /// from large outputs. + #[serde(default)] + pub max_result_chars: Option, + /// Wall-clock timeout for the sub-agent's execution (seconds). #[serde(default)] pub timeout_secs: Option, diff --git a/src/openhuman/agent/harness/definition_tests.rs b/src/openhuman/agent/harness/definition_tests.rs index e48018654..9606f2f2b 100644 --- a/src/openhuman/agent/harness/definition_tests.rs +++ b/src/openhuman/agent/harness/definition_tests.rs @@ -19,6 +19,7 @@ fn make_def(id: &str) -> AgentDefinition { skill_filter: None, extra_tools: vec![], max_iterations: 8, + max_result_chars: None, timeout_secs: None, sandbox_mode: SandboxMode::None, background: false, diff --git a/src/openhuman/agent/harness/fork_context.rs b/src/openhuman/agent/harness/fork_context.rs index 9df42566d..a624aa057 100644 --- a/src/openhuman/agent/harness/fork_context.rs +++ b/src/openhuman/agent/harness/fork_context.rs @@ -69,7 +69,9 @@ pub struct ParentExecutionContext { /// Memory context loaded for the current turn. Auto-injected into /// subagent prompts so they have access to conversation history and /// skill sync data without running their own memory queries. - pub memory_context: Option, + /// Wrapped in `Arc` so cloning into sub-agents is O(1) — a reference + /// count bump rather than a full string copy per spawn. + pub memory_context: Arc>, /// Parent's event-bus session id (for tracing & DomainEvents). pub session_id: String, diff --git a/src/openhuman/agent/harness/payload_summarizer.rs b/src/openhuman/agent/harness/payload_summarizer.rs index d3ce41242..5459b6390 100644 --- a/src/openhuman/agent/harness/payload_summarizer.rs +++ b/src/openhuman/agent/harness/payload_summarizer.rs @@ -359,6 +359,7 @@ mod tests { skill_filter: None, extra_tools: vec![], max_iterations: 1, + max_result_chars: None, timeout_secs: None, sandbox_mode: SandboxMode::None, background: false, diff --git a/src/openhuman/agent/harness/session/turn.rs b/src/openhuman/agent/harness/session/turn.rs index b234b19a5..b22d32a17 100644 --- a/src/openhuman/agent/harness/session/turn.rs +++ b/src/openhuman/agent/harness/session/turn.rs @@ -1058,7 +1058,7 @@ impl Agent { memory: Arc::clone(&self.memory), agent_config: self.config.clone(), skills: Arc::new(self.skills.clone()), - memory_context: self.last_memory_context.clone(), + memory_context: Arc::new(self.last_memory_context.clone()), session_id: self.event_session_id().to_string(), channel: self.event_channel().to_string(), connected_integrations: self.connected_integrations.clone(), diff --git a/src/openhuman/agent/harness/subagent_runner/ops.rs b/src/openhuman/agent/harness/subagent_runner/ops.rs index bc7499d00..0a34bc963 100644 --- a/src/openhuman/agent/harness/subagent_runner/ops.rs +++ b/src/openhuman/agent/harness/subagent_runner/ops.rs @@ -99,11 +99,25 @@ pub async fn run_subagent( // want to gate on it (e.g. `composio_execute` rejecting // Write/Admin slugs under `ReadOnly`) read it via // `current_sandbox_mode()`; tools that don't care just ignore it. - let outcome = with_current_sandbox_mode(definition.sandbox_mode, async { + let mut outcome = with_current_sandbox_mode(definition.sandbox_mode, async { run_typed_mode(definition, task_prompt, &options, &parent, &task_id).await }) .await?; + // Truncate result to the definition's cap if set. + if let Some(cap) = definition.max_result_chars { + if outcome.output.len() > cap { + tracing::debug!( + agent_id = %definition.id, + original_chars = outcome.output.len(), + cap, + "[subagent_runner] truncating oversized result to max_result_chars cap" + ); + outcome.output.truncate(cap); + outcome.output.push_str("\n[...truncated]"); + } + } + tracing::info!( agent_id = %definition.id, task_id = %task_id, @@ -688,7 +702,7 @@ async fn run_typed_mode( let mut context_parts: Vec<&str> = Vec::new(); if !definition.omit_memory_context { - if let Some(ref mem_ctx) = parent.memory_context { + if let Some(ref mem_ctx) = *parent.memory_context { context_parts.push(mem_ctx); } } diff --git a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs index 87abd6cab..c5dfce69c 100644 --- a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs +++ b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs @@ -20,6 +20,7 @@ fn make_def_named_tools(names: &[&str]) -> AgentDefinition { skill_filter: None, extra_tools: vec![], max_iterations: 5, + max_result_chars: None, timeout_secs: None, sandbox_mode: crate::openhuman::agent::harness::definition::SandboxMode::None, background: false, @@ -217,7 +218,7 @@ fn make_parent(provider: Arc, tools: Vec>) -> Parent memory: noop_memory(), agent_config: crate::openhuman::config::AgentConfig::default(), skills: Arc::new(vec![]), - memory_context: None, + memory_context: Arc::new(None), session_id: "test-session".into(), channel: "test".into(), connected_integrations: vec![], @@ -385,7 +386,9 @@ async fn typed_mode_no_memory_context_in_user_message() { async fn typed_mode_includes_memory_context_when_definition_allows_it() { let provider = ScriptedProvider::new(vec![text_response("ok")]); let mut parent = make_parent(provider.clone(), vec![stub("file_read")]); - parent.memory_context = Some("[Memory context]\n- prior fact: branch X failed\n".into()); + parent.memory_context = Arc::new(Some( + "[Memory context]\n- prior fact: branch X failed\n".into(), + )); let mut def = make_def_named_tools(&[]); def.omit_memory_context = false; @@ -634,3 +637,32 @@ async fn typed_mode_progress_emission_is_a_noop_without_sink() { .expect("runner should succeed"); assert_eq!(outcome.iterations, 1); } + +#[test] +fn max_result_chars_cap_is_enforced() { + // Verify that `max_result_chars` truncation logic is correct. + // We test the truncation formula directly since running the full + // subagent loop requires a provider. + let cap = 10usize; + let mut output = "hello world this is long".to_string(); + if output.len() > cap { + output.truncate(cap); + output.push_str("\n[...truncated]"); + } + assert!(output.starts_with("hello worl")); + assert!(output.ends_with("[...truncated]")); +} + +#[test] +fn max_result_chars_not_applied_when_none() { + let cap: Option = None; + let original = "short output".to_string(); + let mut output = original.clone(); + if let Some(c) = cap { + if output.len() > c { + output.truncate(c); + output.push_str("\n[...truncated]"); + } + } + assert_eq!(output, original); +} diff --git a/src/openhuman/agent/prompts/mod.rs b/src/openhuman/agent/prompts/mod.rs index c295d71f2..b589d79e8 100644 --- a/src/openhuman/agent/prompts/mod.rs +++ b/src/openhuman/agent/prompts/mod.rs @@ -395,6 +395,12 @@ impl PromptSection for ToolsSection { } fn build(&self, ctx: &PromptContext<'_>) -> Result { + // Native function-calling: the provider already sends full JSON + // schemas in the API request — no need to repeat them in the + // system prompt. Skip the catalogue to save tokens. + if ctx.tool_call_format == ToolCallFormat::Native { + return Ok(String::new()); + } let mut out = String::from("## Tools\n\n"); let has_filter = !ctx.visible_tool_names.is_empty(); for tool in ctx.tools { diff --git a/src/openhuman/agent/prompts/mod_tests.rs b/src/openhuman/agent/prompts/mod_tests.rs index a9ae66fdf..8e7d85cb5 100644 --- a/src/openhuman/agent/prompts/mod_tests.rs +++ b/src/openhuman/agent/prompts/mod_tests.rs @@ -318,20 +318,14 @@ fn tools_section_pformat_renders_signature_not_schema() { } #[test] -fn tools_section_uses_pformat_signature_for_every_dispatcher() { - // Tool rendering is uniform across dispatchers: always the +fn tools_section_uses_pformat_signature_for_text_dispatchers() { + // Tool rendering is uniform across text dispatchers: always the // compact `Call as: name[args]` signature, never a raw JSON - // schema dump. Native tool calls still carry the full schema - // in the provider request; the `Json` / `PFormat` text - // dispatchers supply any extra framing via - // `ctx.dispatcher_instructions`. + // schema dump. Native tool calls are handled differently — see + // `tools_section_empty_for_native` below. let tools: Vec> = vec![Box::new(TestTool)]; let prompt_tools = PromptTool::from_tools(&tools); - for format in [ - ToolCallFormat::PFormat, - ToolCallFormat::Json, - ToolCallFormat::Native, - ] { + for format in [ToolCallFormat::PFormat, ToolCallFormat::Json] { let ctx = PromptContext { workspace_dir: Path::new("/tmp"), model_name: "test-model", @@ -1327,3 +1321,66 @@ fn user_reflections_render_above_user_memory_when_both_present() { "reflections must render before user-memory block" ); } + +// ─── ToolsSection native-skip tests ────────────────────────────────────────── + +#[test] +fn tools_section_empty_for_native() { + // Native function-calling: the provider sends full JSON schemas in the + // API request — repeating them in the system prompt is pure token bloat. + // ToolsSection must return an empty string for Native mode. + let tools: Vec> = vec![Box::new(TestTool)]; + let prompt_tools = PromptTool::from_tools(&tools); + let ctx = PromptContext { + workspace_dir: Path::new("/tmp"), + model_name: "test-model", + agent_id: "", + tools: &prompt_tools, + skills: &[], + dispatcher_instructions: "", + learned: LearnedContextData::default(), + visible_tool_names: &NO_FILTER, + tool_call_format: ToolCallFormat::Native, + connected_integrations: &[], + connected_identities_md: String::new(), + include_profile: false, + include_memory_md: false, + curated_snapshot: None, + user_identity: None, + }; + let out = ToolsSection.build(&ctx).unwrap(); + assert!( + out.is_empty(), + "Native mode should produce empty ToolsSection, got: {out:?}" + ); +} + +#[test] +fn tools_section_nonempty_for_pformat() { + // PFormat is a text-driven format — the model discovers tools by reading + // the prose `## Tools` section. It must be non-empty. + let tools: Vec> = vec![Box::new(TestTool)]; + let prompt_tools = PromptTool::from_tools(&tools); + let ctx = PromptContext { + workspace_dir: Path::new("/tmp"), + model_name: "test-model", + agent_id: "", + tools: &prompt_tools, + skills: &[], + dispatcher_instructions: "", + learned: LearnedContextData::default(), + visible_tool_names: &NO_FILTER, + tool_call_format: ToolCallFormat::PFormat, + connected_integrations: &[], + connected_identities_md: String::new(), + include_profile: false, + include_memory_md: false, + curated_snapshot: None, + user_identity: None, + }; + let out = ToolsSection.build(&ctx).unwrap(); + assert!( + out.contains("## Tools"), + "PFormat should render tool catalogue header, got: {out:?}" + ); +} diff --git a/src/openhuman/agent/triage/escalation.rs b/src/openhuman/agent/triage/escalation.rs index c25d20a25..d4884738c 100644 --- a/src/openhuman/agent/triage/escalation.rs +++ b/src/openhuman/agent/triage/escalation.rs @@ -153,7 +153,7 @@ async fn dispatch_target_agent(agent_id: &str, prompt: &str) -> anyhow::Result 0` to enable summarization once a payload crosses that token - /// threshold. + /// the raw payload before it enters agent history. Default: 4000 tokens. + /// Set to 0 to disable. /// /// Token count is estimated as `chars / 4` (the same heuristic used /// by `tree_summarizer::estimate_tokens`). Pairs with @@ -123,11 +121,11 @@ fn default_tool_result_budget_bytes() -> usize { } fn default_summarizer_payload_threshold_tokens() -> usize { - // Disabled: 0 short-circuits the payload_summarizer wiring in the - // agent builder (see session/builder.rs `> 0` guard). The summarizer - // sub-agent was being invoked recursively in some flows; keep off - // until that's root-caused. - 0 + // Re-enabled at 4000 tokens after the recursive-dispatch root cause + // was fixed by the `omit_skills_catalog = true` guard on the + // summarizer archetype (which prevents it from seeing `spawn_subagent` + // and thus cannot recurse). 0 would leave this entirely disabled. + 4000 } fn default_summarizer_max_payload_tokens() -> usize { diff --git a/src/openhuman/tools/impl/agent/spawn_worker_thread.rs b/src/openhuman/tools/impl/agent/spawn_worker_thread.rs index f6d8a60ff..8cc35aec7 100644 --- a/src/openhuman/tools/impl/agent/spawn_worker_thread.rs +++ b/src/openhuman/tools/impl/agent/spawn_worker_thread.rs @@ -384,7 +384,7 @@ mod tests { all_tools: Arc::new(vec![]), all_tool_specs: Arc::new(vec![]), skills: Arc::new(vec![]), - memory_context: None, + memory_context: std::sync::Arc::new(None), connected_integrations: vec![], composio_client: None, on_progress: None, diff --git a/src/openhuman/tools/impl/memory/tree/mod.rs b/src/openhuman/tools/impl/memory/tree/mod.rs index f08bc835d..c0b4c99a8 100644 --- a/src/openhuman/tools/impl/memory/tree/mod.rs +++ b/src/openhuman/tools/impl/memory/tree/mod.rs @@ -1,21 +1,11 @@ -//! LLM-callable wrappers for the Phase 4 memory-tree retrieval primitives -//! (issue #710). Each tool is a thin shim over one typed function in -//! [`crate::openhuman::memory::tree::retrieval`]; the `*_rpc` variants are -//! intentionally avoided because they wrap responses in an `RpcOutcome` -//! envelope that is noisier than what the LLM needs. +//! Consolidated memory-tree tool — dispatches to the correct retrieval +//! primitive based on the `mode` argument. Reduces the orchestrator's +//! tool surface from 6 entries to 1. //! -//! All six tools share the same shape: -//! 1. Deserialize args into the matching `*Request` struct from -//! [`crate::openhuman::memory::tree::retrieval::rpc`]. -//! 2. Load the active workspace `Config` via -//! [`crate::openhuman::config::rpc::load_config_with_timeout`]. -//! 3. Call the typed retrieval function. -//! 4. Serialise the response to JSON and return it as `ToolResult::success`. -//! -//! The tools are stateless unit structs — there is no per-instance state to -//! carry, so they slot directly into `Vec>` without needing -//! constructors. Logs use the `[tool]` / `[memory_tree]` prefixes per the -//! repo's debug-logging conventions. +//! The individual per-mode structs are still re-exported for callers that +//! need them directly (e.g. tool registration in ops.rs for agents that +//! prefer the individual tools). The consolidated [`MemoryTreeTool`] is +//! the recommended single entry point for the orchestrator. mod drill_down; mod fetch_leaves; @@ -24,9 +14,177 @@ mod query_source; mod query_topic; mod search_entities; +// Re-export individual tool types for callers that need them directly +// (e.g. tool registration in ops.rs). pub use drill_down::MemoryTreeDrillDownTool; pub use fetch_leaves::MemoryTreeFetchLeavesTool; pub use query_global::MemoryTreeQueryGlobalTool; pub use query_source::MemoryTreeQuerySourceTool; pub use query_topic::MemoryTreeQueryTopicTool; pub use search_entities::MemoryTreeSearchEntitiesTool; + +use crate::openhuman::tools::traits::{Tool, ToolResult}; +use async_trait::async_trait; +use serde_json::json; + +/// Single multi-mode tool that consolidates all six memory-tree retrieval +/// primitives behind one LLM-facing entry. The `mode` field routes to the +/// appropriate underlying implementation. +pub struct MemoryTreeTool; + +#[async_trait] +impl Tool for MemoryTreeTool { + fn name(&self) -> &str { + "memory_tree" + } + + fn description(&self) -> &str { + "Query the user's ingested email/chat/document memory tree. \ + Set `mode` to one of: `search_entities` (resolve a name to a \ + canonical id — call first when the user mentions someone by name), \ + `query_topic` (all cross-source mentions of an entity), \ + `query_source` (filter by source type + time window), \ + `query_global` (cross-source daily digest), \ + `drill_down` (expand a coarse summary one level), \ + `fetch_leaves` (pull raw chunks for citation)." + } + + fn parameters_schema(&self) -> serde_json::Value { + json!({ + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": ["search_entities", "query_topic", "query_source", + "query_global", "drill_down", "fetch_leaves"], + "description": "Which retrieval operation to run." + }, + // search_entities params + "query": { + "type": "string", + "description": "search_entities: substring to match. query_topic/query_source: semantic rerank query (optional)." + }, + "kinds": { + "type": "array", + "items": {"type": "string"}, + "description": "search_entities: optional entity kind filter (email, url, handle, person, ...)." + }, + // query_topic params + "entity_id": { + "type": "string", + "description": "query_topic: canonical entity id returned by search_entities." + }, + // query_source params + "source_kind": { + "type": "string", + "description": "query_source: source type to filter (chat, email, document, ...)." + }, + "time_window_days": { + "type": "integer", + "description": "query_source/query_global: look-back window in days." + }, + // drill_down params + "node_id": { + "type": "string", + "description": "drill_down: id of the summary node to expand." + }, + "max_depth": { + "type": "integer", + "description": "drill_down: how many levels to expand (default 1, max 3)." + }, + // fetch_leaves params + "chunk_ids": { + "type": "array", + "items": {"type": "string"}, + "description": "fetch_leaves: list of chunk ids to pull." + }, + // shared + "limit": { + "type": "integer", + "description": "Max results (default varies by mode)." + } + }, + "required": ["mode"] + }) + } + + async fn execute(&self, args: serde_json::Value) -> anyhow::Result { + let mode = args + .get("mode") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("memory_tree: `mode` is required"))?; + log::debug!("[tool][memory_tree] mode={mode}"); + match mode { + "search_entities" => MemoryTreeSearchEntitiesTool.execute(args).await, + "query_topic" => MemoryTreeQueryTopicTool.execute(args).await, + "query_source" => MemoryTreeQuerySourceTool.execute(args).await, + "query_global" => MemoryTreeQueryGlobalTool.execute(args).await, + "drill_down" => MemoryTreeDrillDownTool.execute(args).await, + "fetch_leaves" => MemoryTreeFetchLeavesTool.execute(args).await, + other => Err(anyhow::anyhow!( + "memory_tree: unknown mode `{other}`. Valid: search_entities, query_topic, query_source, query_global, drill_down, fetch_leaves" + )), + } + } +} + +#[cfg(test)] +mod memory_tree_dispatcher_tests { + use super::*; + use crate::openhuman::tools::traits::Tool; + use serde_json::json; + + #[test] + fn memory_tree_tool_name_is_correct() { + assert_eq!(MemoryTreeTool.name(), "memory_tree"); + } + + #[test] + fn memory_tree_schema_requires_mode() { + let schema = MemoryTreeTool.parameters_schema(); + let required = schema.get("required").and_then(|r| r.as_array()).unwrap(); + assert!(required.iter().any(|v| v.as_str() == Some("mode"))); + } + + #[test] + fn memory_tree_schema_mode_enum_has_all_six_modes() { + let schema = MemoryTreeTool.parameters_schema(); + let modes: Vec<&str> = schema + .get("properties") + .unwrap() + .get("mode") + .unwrap() + .get("enum") + .unwrap() + .as_array() + .unwrap() + .iter() + .filter_map(|v| v.as_str()) + .collect(); + assert!(modes.contains(&"search_entities")); + assert!(modes.contains(&"query_topic")); + assert!(modes.contains(&"query_source")); + assert!(modes.contains(&"query_global")); + assert!(modes.contains(&"drill_down")); + assert!(modes.contains(&"fetch_leaves")); + } + + #[tokio::test] + async fn memory_tree_unknown_mode_returns_error() { + let result = MemoryTreeTool + .execute(json!({"mode": "invalid_mode"})) + .await; + assert!(result.is_err()); + let msg = result.unwrap_err().to_string(); + assert!( + msg.contains("unknown mode"), + "Expected 'unknown mode' in: {msg}" + ); + } + + #[tokio::test] + async fn memory_tree_missing_mode_returns_error() { + let result = MemoryTreeTool.execute(json!({})).await; + assert!(result.is_err()); + } +} diff --git a/src/openhuman/tools/ops.rs b/src/openhuman/tools/ops.rs index 66053bd4d..5064cbec7 100644 --- a/src/openhuman/tools/ops.rs +++ b/src/openhuman/tools/ops.rs @@ -134,12 +134,7 @@ pub fn all_tools_with_runtime( Box::new(MemoryStoreTool::new(memory.clone(), security.clone())), Box::new(MemoryRecallTool::new(memory.clone())), Box::new(MemoryForgetTool::new(memory.clone(), security.clone())), - Box::new(MemoryTreeSearchEntitiesTool), - Box::new(MemoryTreeQueryTopicTool), - Box::new(MemoryTreeQuerySourceTool), - Box::new(MemoryTreeQueryGlobalTool), - Box::new(MemoryTreeDrillDownTool), - Box::new(MemoryTreeFetchLeavesTool), + Box::new(MemoryTreeTool), Box::new(ScheduleTool::new(security.clone(), root_config.clone())), Box::new(ProxyConfigTool::new(config.clone(), security.clone())), Box::new(GitOperationsTool::new( diff --git a/src/openhuman/tools/orchestrator_tools.rs b/src/openhuman/tools/orchestrator_tools.rs index 2f4dfbdd5..14e9240b5 100644 --- a/src/openhuman/tools/orchestrator_tools.rs +++ b/src/openhuman/tools/orchestrator_tools.rs @@ -213,6 +213,7 @@ mod tests { skill_filter: None, extra_tools: vec![], max_iterations: 8, + max_result_chars: None, timeout_secs: None, sandbox_mode: SandboxMode::None, background: false, diff --git a/tests/agent_harness_public.rs b/tests/agent_harness_public.rs index 7d7045147..3b9d242e5 100644 --- a/tests/agent_harness_public.rs +++ b/tests/agent_harness_public.rs @@ -130,7 +130,7 @@ fn stub_parent_context() -> ParentExecutionContext { memory: Arc::new(StubMemory), agent_config: AgentConfig::default(), skills: Arc::new(vec![]), - memory_context: Some("ctx".into()), + memory_context: Arc::new(Some("ctx".into())), session_id: "test-session".into(), channel: "test-channel".into(), connected_integrations: vec![], diff --git a/tests/calendar_grounding_e2e.rs b/tests/calendar_grounding_e2e.rs index 0938b4ed6..2c65ff5a8 100644 --- a/tests/calendar_grounding_e2e.rs +++ b/tests/calendar_grounding_e2e.rs @@ -148,7 +148,7 @@ async fn test_integrations_agent_has_current_date_context() -> Result<()> { memory: Arc::new(StubMemory), agent_config: openhuman_core::openhuman::config::AgentConfig::default(), skills: Arc::new(vec![]), - memory_context: None, + memory_context: Arc::new(None), session_id: "test-session".into(), channel: "test".into(), connected_integrations: vec![], From b47a9846d1725134b3324599cee3c98ffc3005d2 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Thu, 7 May 2026 11:49:19 +0530 Subject: [PATCH 2/9] fix(test): update orchestrator test to match #1141 tool surface changes spawn_subagent was removed in #1141; spawn_worker_thread and memory_tree are the new entries the test should assert on. Pins the new contract so future TOML edits can't silently revert it. --- src/openhuman/agent/agents/loader.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/openhuman/agent/agents/loader.rs b/src/openhuman/agent/agents/loader.rs index 5c6e280f2..3bc494926 100644 --- a/src/openhuman/agent/agents/loader.rs +++ b/src/openhuman/agent/agents/loader.rs @@ -301,7 +301,20 @@ mod tests { assert!(matches!(def.model, ModelSpec::Hint(ref h) if h == "reasoning")); match def.tools { ToolScope::Named(tools) => { - assert!(tools.iter().any(|t| t == "spawn_subagent")); + // spawn_subagent was removed in #1141; spawn_worker_thread is the replacement + assert!( + tools.iter().any(|t| t == "spawn_worker_thread"), + "orchestrator must have spawn_worker_thread" + ); + assert!( + !tools.iter().any(|t| t == "spawn_subagent"), + "spawn_subagent must not appear — removed in #1141" + ); + // consolidated memory_tree* → single memory_tree with mode dispatch + assert!( + tools.iter().any(|t| t == "memory_tree"), + "orchestrator must have memory_tree" + ); assert!(!tools.iter().any(|t| t == "shell")); assert!(!tools.iter().any(|t| t == "file_write")); } From 9a688b7050f3f71b80a28a873797f62b3bd754ab Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Thu, 7 May 2026 11:49:31 +0530 Subject: [PATCH 3/9] fix(subagent_runner): use char-safe truncation for max_result_chars cap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit String::len() counts bytes; String::truncate(n) panics if n lands in the middle of a multi-byte UTF-8 sequence. Switch to chars().count() for the guard and char_indices().nth(cap) to find the safe byte offset before truncating. Also fix the tracing::debug log field — original_chars now correctly counts characters, not bytes. Fixes @coderabbitai comment on ops.rs:117. --- .../agent/harness/subagent_runner/ops.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/openhuman/agent/harness/subagent_runner/ops.rs b/src/openhuman/agent/harness/subagent_runner/ops.rs index 0a34bc963..b8ec65303 100644 --- a/src/openhuman/agent/harness/subagent_runner/ops.rs +++ b/src/openhuman/agent/harness/subagent_runner/ops.rs @@ -105,15 +105,26 @@ pub async fn run_subagent( .await?; // Truncate result to the definition's cap if set. + // Use char-count (not byte-length) to avoid panicking on multi-byte + // UTF-8 sequences at the truncation boundary. if let Some(cap) = definition.max_result_chars { - if outcome.output.len() > cap { + let original_chars = outcome.output.chars().count(); + if original_chars > cap { tracing::debug!( agent_id = %definition.id, - original_chars = outcome.output.len(), + original_chars, cap, "[subagent_runner] truncating oversized result to max_result_chars cap" ); - outcome.output.truncate(cap); + // Find the byte offset of the cap-th character boundary so + // `truncate` never lands mid-codepoint. + let byte_offset = outcome + .output + .char_indices() + .nth(cap) + .map(|(i, _)| i) + .unwrap_or(outcome.output.len()); + outcome.output.truncate(byte_offset); outcome.output.push_str("\n[...truncated]"); } } From 956c383c7ef00098add7cedaa692ce0a4d5e1e79 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Thu, 7 May 2026 11:49:43 +0530 Subject: [PATCH 4/9] fix(orchestrator): align delegation-guide tool names with synthesised tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit render_delegation_guide was printing raw ci.toolkit as the delegate_* tool name, but collect_orchestrator_tools sanitises toolkit slugs via sanitise_slug() first. A quirky slug (e.g. "Google Calendar" with a space) would print a tool name in the prompt that never exists in the function-calling schema. Fix: export sanitise_slug as pub(crate) and use it in render_delegation_guide so prompt and schema always agree. Also: - Skip unconnected integrations in collect_orchestrator_tools (previously delegate_* tools were synthesised for disconnected toolkits, whose pre-flight check would immediately reject any call). - Add tracing::debug log lines around the delegation-guide rendering branches (connected count, emit/omit decision, section size). - Update prompt.md to remove stale spawn_subagent reference in "Dedicated worker threads" section — the tool was removed in #1141. - Update prompt.rs module docstring accordingly. Addresses @coderabbitai comments on prompt.rs:85, orchestrator_tools.rs:126-159, prompt.md:55-63, and prompt.rs:38-42. --- .../agent/agents/orchestrator/prompt.md | 22 ++++---- .../agent/agents/orchestrator/prompt.rs | 38 +++++++++++--- src/openhuman/tools/orchestrator_tools.rs | 51 ++++++++++++++++++- 3 files changed, 90 insertions(+), 21 deletions(-) diff --git a/src/openhuman/agent/agents/orchestrator/prompt.md b/src/openhuman/agent/agents/orchestrator/prompt.md index 23c04c66e..4cd202222 100644 --- a/src/openhuman/agent/agents/orchestrator/prompt.md +++ b/src/openhuman/agent/agents/orchestrator/prompt.md @@ -52,18 +52,16 @@ or a manual fallback. ## Dedicated worker threads -`spawn_subagent` accepts an optional `dedicated_thread: true` flag. When set, the -sub-agent's run is persisted into a fresh **worker**-labeled thread the user can -open from the thread list, and you receive a compact reference (worker thread id -+ brief summary) instead of the full sub-agent transcript. Use this **only** -when the sub-task is genuinely long or complex and the parent thread should not -be flooded with the sub-agent's output — for example multi-step research, -multi-file refactors, or batch integration work that produces a large -transcript. For everyday delegation keep `dedicated_thread` off (the default) -and surface the result inline. - -Worker threads are one level deep by design: a sub-agent never sees -`spawn_subagent` or `spawn_worker_thread`, so a worker cannot itself spawn another worker. +Use `spawn_worker_thread` for genuinely long or complex delegated tasks where the full +sub-agent transcript would flood the parent thread — for example multi-step research, +multi-file refactors, or batch integration work. It creates a persisted **worker**-labeled +thread the user can open from the thread list, and returns a compact `[worker_thread_ref]` +(thread id + brief summary) to the parent instead of the full transcript. + +For routine delegation use the matching `delegate_*` tool and surface the result inline. + +Worker threads are one level deep by design: a sub-agent spawned via `spawn_worker_thread` +cannot itself call `spawn_worker_thread`, so workers never nest. ## Connecting external services diff --git a/src/openhuman/agent/agents/orchestrator/prompt.rs b/src/openhuman/agent/agents/orchestrator/prompt.rs index 2e043afec..3046e85a9 100644 --- a/src/openhuman/agent/agents/orchestrator/prompt.rs +++ b/src/openhuman/agent/agents/orchestrator/prompt.rs @@ -3,7 +3,8 @@ //! The orchestrator follows a direct-first policy: respond directly or use //! cheap direct tools whenever possible, and delegate only for specialised //! execution. It never executes Composio actions itself; the integration -//! block points to `spawn_subagent(integrations_agent, toolkit=…)` for true +//! block points to `delegate_{toolkit}` tools (synthesised by +//! `orchestrator_tools::collect_orchestrator_tools`) for true //! external-service operations. That prose lives here (not in the shared //! prompts module) so the skill-executor voice stays in //! `integrations_agent/prompt.rs` and nobody has to branch on `agent_id` @@ -13,6 +14,7 @@ use crate::openhuman::context::prompt::{ render_datetime, render_tools, render_user_files, render_workspace, ConnectedIntegration, PromptContext, }; +use crate::openhuman::tools::orchestrator_tools::sanitise_slug; use anyhow::Result; use std::fmt::Write; @@ -62,16 +64,28 @@ pub fn build(ctx: &PromptContext<'_>) -> Result { Ok(out) } -/// Render the delegator-voice `## Delegation Guide — Integrations` -/// block. Only toolkits the user has actively connected are listed — -/// unauthorised toolkits are hidden so the orchestrator can't hallucinate -/// a spawn against an integration the `spawn_subagent` pre-flight will -/// immediately reject. When every toolkit is unconnected, the whole -/// section is omitted. +/// Render the delegator-voice `## Connected Integrations` block. Only +/// toolkits the user has actively connected are listed — unauthorised +/// toolkits are hidden so the orchestrator cannot hallucinate a delegation +/// to an integration whose `delegate_*` tool does not actually exist. +/// When every toolkit is unconnected the whole section is omitted. +/// +/// The tool name printed in the prompt is derived with the same +/// `sanitise_slug` function that `collect_orchestrator_tools` uses when +/// synthesising the real tool objects, so the names in the prompt always +/// match the names in the function-calling schema. fn render_delegation_guide(integrations: &[ConnectedIntegration]) -> String { let connected: Vec<&ConnectedIntegration> = integrations.iter().filter(|ci| ci.connected).collect(); + tracing::debug!( + total_integrations = integrations.len(), + connected_count = connected.len(), + "[delegation-guide] rendering integration section ({} connected / {} total)", + connected.len(), + integrations.len() + ); if connected.is_empty() { + tracing::debug!("[delegation-guide] section omitted — no connected integrations"); return String::new(); } let mut out = String::from( @@ -79,12 +93,20 @@ fn render_delegation_guide(integrations: &[ConnectedIntegration]) -> String { Delegate tasks for these services using the matching `delegate_{toolkit}` tool:\n\n", ); for ci in connected { + // Use the same slug canonicalisation as `collect_orchestrator_tools` + // so the tool name in the prompt always matches the synthesised tool. + let slug = sanitise_slug(&ci.toolkit); let _ = writeln!( out, "- **{}** (delegate via `delegate_{}`): {}", - ci.toolkit, ci.toolkit, ci.description + ci.toolkit, slug, ci.description ); } + tracing::debug!( + section_len = out.len(), + "[delegation-guide] section emitted ({} bytes)", + out.len() + ); out } diff --git a/src/openhuman/tools/orchestrator_tools.rs b/src/openhuman/tools/orchestrator_tools.rs index 14e9240b5..77e162861 100644 --- a/src/openhuman/tools/orchestrator_tools.rs +++ b/src/openhuman/tools/orchestrator_tools.rs @@ -124,6 +124,17 @@ pub fn collect_orchestrator_tools( continue; } for integration in connected_integrations { + // Only emit a delegate_* tool for integrations that are + // actually connected — exposing unconnected entries would + // let the orchestrator call a tool whose pre-flight + // will immediately reject with "not connected". + if !integration.connected { + log::debug!( + "[orchestrator_tools] skipping unconnected integration: {}", + integration.toolkit + ); + continue; + } // Slug the toolkit name into a tool-name-safe form. // Composio toolkit slugs are already lowercase / dash- // separated (e.g. "gmail", "google_calendar"), but @@ -175,7 +186,11 @@ pub fn collect_orchestrator_tools( /// Allows ASCII alphanumerics and underscores; everything else becomes /// an underscore. OpenAI-style function names only accept /// `[a-zA-Z0-9_-]{1,64}`, so this is the conservative subset. -fn sanitise_slug(raw: &str) -> String { +/// +/// Used both when synthesising `delegate_*` tools and when rendering the +/// delegation guide in prompts — they must agree on slug canonicalisation +/// so the prompt always references a tool name that actually exists. +pub(crate) fn sanitise_slug(raw: &str) -> String { raw.chars() .map(|c| { if c.is_ascii_alphanumeric() || c == '_' { @@ -357,4 +372,38 @@ mod tests { assert_eq!(sanitise_slug("slack.bot"), "slack_bot"); assert_eq!(sanitise_slug("weird name!"), "weird_name_"); } + + /// Unconnected integrations must be silently skipped — exposing a + /// `delegate_*` tool for a toolkit whose OAuth token is absent would + /// let the orchestrator call a tool whose pre-flight check immediately + /// rejects with "not connected". + #[test] + fn unconnected_integrations_are_skipped() { + let orch = sample_orchestrator(); + let reg = registry_with_targets(); + let integrations = vec![ + integration("gmail", "Send and read email."), + ConnectedIntegration { + toolkit: "github".into(), + description: "GitHub access.".into(), + tools: vec![], + connected: false, // not connected — must not produce a tool + }, + integration("notion", "Read and write pages."), + ]; + let tools = collect_orchestrator_tools(&orch, ®, &integrations); + let names: Vec<&str> = tools.iter().map(|t| t.name()).collect(); + assert!( + names.contains(&"delegate_gmail"), + "connected gmail must produce a tool" + ); + assert!( + !names.contains(&"delegate_github"), + "unconnected github must NOT produce a tool" + ); + assert!( + names.contains(&"delegate_notion"), + "connected notion must produce a tool" + ); + } } From e1481e81ddb4ba26aaafcdcd9b2f192dbb718c23 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Thu, 7 May 2026 11:49:59 +0530 Subject: [PATCH 5/9] fix(prompts): pass dispatcher_instructions through for Native tool format The ToolsSection early-return on ToolCallFormat::Native discarded any non-empty dispatcher_instructions. NativeToolDispatcher::prompt_instructions() returns a non-empty "## Tool Use Protocol" block that models need in order to understand they should emit native tool calls rather than narrate intent. New behaviour: if dispatcher_instructions is non-empty for Native mode, return it directly (the tool catalogue is still omitted to save tokens); if empty, return String::new() as before. Adds a test to pin the new contract. Addresses @coderabbitai comment on prompts/mod.rs:398-403. --- src/openhuman/agent/prompts/mod.rs | 12 ++++++-- src/openhuman/agent/prompts/mod_tests.rs | 36 ++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/openhuman/agent/prompts/mod.rs b/src/openhuman/agent/prompts/mod.rs index b589d79e8..50913defe 100644 --- a/src/openhuman/agent/prompts/mod.rs +++ b/src/openhuman/agent/prompts/mod.rs @@ -396,10 +396,16 @@ impl PromptSection for ToolsSection { fn build(&self, ctx: &PromptContext<'_>) -> Result { // Native function-calling: the provider already sends full JSON - // schemas in the API request — no need to repeat them in the - // system prompt. Skip the catalogue to save tokens. + // schemas in the API request — no need to repeat the tool catalogue + // in the system prompt (pure token bloat). However, any non-empty + // `dispatcher_instructions` (e.g. the "## Tool Use Protocol" block + // from NativeToolDispatcher) must still be included so the model + // receives its behavioural guidance. if ctx.tool_call_format == ToolCallFormat::Native { - return Ok(String::new()); + if ctx.dispatcher_instructions.trim().is_empty() { + return Ok(String::new()); + } + return Ok(ctx.dispatcher_instructions.to_string()); } let mut out = String::from("## Tools\n\n"); let has_filter = !ctx.visible_tool_names.is_empty(); diff --git a/src/openhuman/agent/prompts/mod_tests.rs b/src/openhuman/agent/prompts/mod_tests.rs index 8e7d85cb5..beb7b8dd4 100644 --- a/src/openhuman/agent/prompts/mod_tests.rs +++ b/src/openhuman/agent/prompts/mod_tests.rs @@ -1384,3 +1384,39 @@ fn tools_section_nonempty_for_pformat() { "PFormat should render tool catalogue header, got: {out:?}" ); } + +#[test] +fn tools_section_native_with_dispatcher_instructions_returns_instructions() { + // Native mode must still include non-empty dispatcher_instructions + // (e.g. the "## Tool Use Protocol" block from NativeToolDispatcher) so + // the model receives behavioural guidance even though the tool catalogue + // itself is omitted. + let tools: Vec> = vec![Box::new(TestTool)]; + let prompt_tools = PromptTool::from_tools(&tools); + let ctx = PromptContext { + workspace_dir: Path::new("/tmp"), + model_name: "test-model", + agent_id: "", + tools: &prompt_tools, + skills: &[], + dispatcher_instructions: "## Tool Use Protocol\n\nUse native tool calling.", + learned: LearnedContextData::default(), + visible_tool_names: &NO_FILTER, + tool_call_format: ToolCallFormat::Native, + connected_integrations: &[], + connected_identities_md: String::new(), + include_profile: false, + include_memory_md: false, + curated_snapshot: None, + user_identity: None, + }; + let out = ToolsSection.build(&ctx).unwrap(); + assert!( + out.contains("## Tool Use Protocol"), + "Native mode with non-empty dispatcher_instructions must include them, got: {out:?}" + ); + assert!( + !out.contains("## Tools"), + "Native mode must not include the tool catalogue header, got: {out:?}" + ); +} From 594463ec1bc2a545a95a1748d2286f02aa9ea4c7 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Thu, 7 May 2026 11:50:14 +0530 Subject: [PATCH 6/9] fix(dispatch): add 3s timeout to fetch_connected_integrations in routing path build_connection_state_block already wraps its fetch in a 3-second timeout to prevent Composio API latency from blocking turn processing, but the fetch at the delegation-tools expansion site (line ~442) had no such protection. Slow or unresponsive Composio API could block agent routing indefinitely. Wrap with the same 3s tokio::time::timeout and fall back to Vec::new() on timeout, matching the pattern used elsewhere in this file. Addresses @coderabbitai comment on dispatch.rs:441-449. --- src/openhuman/channels/runtime/dispatch.rs | 23 +++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/openhuman/channels/runtime/dispatch.rs b/src/openhuman/channels/runtime/dispatch.rs index 70c9585d6..804fcc5f0 100644 --- a/src/openhuman/channels/runtime/dispatch.rs +++ b/src/openhuman/channels/runtime/dispatch.rs @@ -438,8 +438,29 @@ async fn resolve_target_agent(channel: &str) -> AgentScoping { // the helper is agent-agnostic so future agents that delegate // (e.g. a custom workspace-override planner that subdivides work) // pick this up for free. + // + // Wrap the Composio fetch in the same 3-second timeout used by + // `build_connection_state_block` so a slow/unresponsive Composio API + // can never block turn dispatch indefinitely. + const COMPOSIO_FETCH_TIMEOUT_SECS: u64 = 3; let extra_tools = if !definition.subagents.is_empty() { - let connected = fetch_connected_integrations(&config).await; + let connected = match tokio::time::timeout( + Duration::from_secs(COMPOSIO_FETCH_TIMEOUT_SECS), + fetch_connected_integrations(&config), + ) + .await + { + Ok(list) => list, + Err(_) => { + tracing::warn!( + channel = %channel, + target_agent = target_id, + "[dispatch::routing] Composio fetch timed out after {}s — proceeding without connected integrations", + COMPOSIO_FETCH_TIMEOUT_SECS + ); + Vec::new() + } + }; tracing::debug!( channel = %channel, target_agent = target_id, From 97e5c9120e4c75ac4d28a452833714d6f06215d4 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Thu, 7 May 2026 11:51:48 +0530 Subject: [PATCH 7/9] test(subagent_runner,orchestrator_tools): add char-safe and unconnected-integration tests - ops_tests.rs: add max_result_chars_cap_is_char_safe_for_multibyte to prove truncation at a multi-byte codepoint boundary does not panic; update existing tests to use the char-safe logic matching ops.rs. - orchestrator_tools.rs: add unconnected_integrations_are_skipped to pin the new connected-only filtering contract. Addresses @coderabbitai nitpick on ops_tests.rs:641-668. --- .../harness/subagent_runner/ops_tests.rs | 52 +++++++++++++++---- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs index c5dfce69c..e6dc740f4 100644 --- a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs +++ b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs @@ -640,27 +640,61 @@ async fn typed_mode_progress_emission_is_a_noop_without_sink() { #[test] fn max_result_chars_cap_is_enforced() { - // Verify that `max_result_chars` truncation logic is correct. - // We test the truncation formula directly since running the full - // subagent loop requires a provider. + // Verify that max_result_chars truncation uses char count (not bytes) + // and produces a truncated result ending with "[...truncated]". let cap = 10usize; - let mut output = "hello world this is long".to_string(); - if output.len() > cap { - output.truncate(cap); + let input = "hello world this is long".to_string(); + // Simulate the logic in ops.rs (char-safe path). + let original_chars = input.chars().count(); + let mut output = input.clone(); + if original_chars > cap { + let byte_offset = output + .char_indices() + .nth(cap) + .map(|(i, _)| i) + .unwrap_or(output.len()); + output.truncate(byte_offset); output.push_str("\n[...truncated]"); } - assert!(output.starts_with("hello worl")); + assert_eq!(&output[..10], "hello worl"); assert!(output.ends_with("[...truncated]")); } +#[test] +fn max_result_chars_cap_is_char_safe_for_multibyte() { + // A cap landing in the middle of a multi-byte UTF-8 sequence must + // not panic. "café" has 4 chars but the 'é' is 2 bytes — truncating + // at byte 4 with the old String::truncate() would panic. + let cap = 3usize; // keep "caf", drop "é" + let input = "café latte".to_string(); + let original_chars = input.chars().count(); + let mut output = input.clone(); + if original_chars > cap { + let byte_offset = output + .char_indices() + .nth(cap) + .map(|(i, _)| i) + .unwrap_or(output.len()); + output.truncate(byte_offset); + output.push_str("\n[...truncated]"); + } + assert_eq!(output, "caf\n[...truncated]"); +} + #[test] fn max_result_chars_not_applied_when_none() { let cap: Option = None; let original = "short output".to_string(); let mut output = original.clone(); if let Some(c) = cap { - if output.len() > c { - output.truncate(c); + let char_len = output.chars().count(); + if char_len > c { + let byte_offset = output + .char_indices() + .nth(c) + .map(|(i, _)| i) + .unwrap_or(output.len()); + output.truncate(byte_offset); output.push_str("\n[...truncated]"); } } From da54bb7bf8fc54e0f4c4ef0659f7c238b13ec09d Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Thu, 7 May 2026 12:06:50 +0530 Subject: [PATCH 8/9] fix(test): update orchestrator_lists_memory_tree_tools for consolidated tool #1141 consolidated 6 individual memory_tree_* tools into a single memory_tree tool with mode dispatch. The integration test in tests/agent_retrieval_e2e.rs still asserted all 6 old names existed in the orchestrator TOML, causing CI to fail. Update the test to: - Assert memory_tree IS present (the new consolidated tool) - Assert none of the old individual names are present (pinning removal) --- tests/agent_retrieval_e2e.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/agent_retrieval_e2e.rs b/tests/agent_retrieval_e2e.rs index edebef5ad..3bd7e2935 100644 --- a/tests/agent_retrieval_e2e.rs +++ b/tests/agent_retrieval_e2e.rs @@ -80,13 +80,24 @@ fn alice_phoenix_thread() -> EmailThread { } } -/// The orchestrator definition must list every memory-tree tool name so -/// the bus filter actually exposes them to the LLM. A wired-up wrapper -/// that's invisible to the orchestrator is dead code. +/// The orchestrator definition must list the consolidated `memory_tree` tool +/// so the bus filter exposes it to the LLM. A wired-up wrapper that's +/// invisible to the orchestrator is dead code. +/// +/// NOTE: #1141 consolidated the 6 individual `memory_tree_*` tools +/// (`memory_tree_search_entities`, `memory_tree_query_topic`, etc.) into a +/// single `memory_tree` tool with a `mode` dispatch parameter. The orchestrator +/// TOML was updated accordingly. #[test] fn orchestrator_lists_memory_tree_tools() { let toml = include_str!("../src/openhuman/agent/agents/orchestrator/agent.toml"); - for name in [ + assert!( + toml.contains("memory_tree"), + "orchestrator agent.toml must list 'memory_tree' so the LLM can call it" + ); + // Verify the old individual tool names are gone — they were removed in #1141 + // when all 6 were consolidated into the single `memory_tree` dispatcher. + for old_name in [ "memory_tree_search_entities", "memory_tree_query_topic", "memory_tree_query_source", @@ -95,8 +106,8 @@ fn orchestrator_lists_memory_tree_tools() { "memory_tree_fetch_leaves", ] { assert!( - toml.contains(name), - "orchestrator agent.toml must list '{name}' so the LLM can call it" + !toml.contains(old_name), + "orchestrator agent.toml must NOT list '{old_name}' — removed in #1141 (use 'memory_tree' with mode= dispatch)" ); } } From f9943f721d4585380c92e232df24ae9d8a096df8 Mon Sep 17 00:00:00 2001 From: M3gA-Mind Date: Thu, 7 May 2026 12:22:26 +0530 Subject: [PATCH 9/9] fix(review): address CodeRabbit feedback on PR #1314 - orchestrator/prompt.md: replace `integrations_agent` references in the decision tree with the actual synthesised `delegate_{toolkit}` / `delegate_run_code` / `delegate_plan` etc. tool names so the prompt matches what the LLM sees in its function-calling schema - tests/agent_retrieval_e2e.rs: strengthen memory_tree tool assertion to use exact line matching (`"memory_tree"` / `"memory_tree",`) rather than a substring match that could hit comments or prefixed names - subagent_runner/ops_truncation_tests.rs: extract the three max_result_chars truncation tests into their own file, keeping ops_tests.rs under the 500-line guideline threshold --- .../agent/agents/orchestrator/prompt.md | 11 +-- .../agent/harness/subagent_runner/ops.rs | 4 ++ .../harness/subagent_runner/ops_tests.rs | 64 +----------------- .../subagent_runner/ops_truncation_tests.rs | 67 +++++++++++++++++++ tests/agent_retrieval_e2e.rs | 17 ++++- 5 files changed, 93 insertions(+), 70 deletions(-) create mode 100644 src/openhuman/agent/harness/subagent_runner/ops_truncation_tests.rs diff --git a/src/openhuman/agent/agents/orchestrator/prompt.md b/src/openhuman/agent/agents/orchestrator/prompt.md index 4cd202222..0122fbee0 100644 --- a/src/openhuman/agent/agents/orchestrator/prompt.md +++ b/src/openhuman/agent/agents/orchestrator/prompt.md @@ -21,11 +21,12 @@ Follow this sequence for every user message: - Yes: use direct tools first (`current_time`, `cron_*`, `memory_*`, `composio_list_connections`, etc.). - No: continue. 3. **Does this need specialised execution?** - - If external SaaS integration work is required, delegate to `integrations_agent` with the right toolkit. - - If code writing/execution/debugging is required, delegate to `code_executor`. - - If web/doc crawling is required, delegate to `researcher`. - - If complex multi-step decomposition is required, delegate to `planner` (and only then route deeper if necessary). - - If code review is requested, delegate to `critic`. + - If external SaaS integration work is required, use `delegate_{toolkit}` (e.g. `delegate_gmail`, `delegate_notion`). + - If code writing/execution/debugging is required, use `delegate_run_code`. + - If web/doc crawling is required, use `delegate_researcher`. + - If complex multi-step decomposition is required, use `delegate_plan`. + - If code review is requested, use `delegate_critic`. + - If memory archiving or distillation is required, use `delegate_archivist`. 4. **After delegation**, summarise results clearly and concisely. Default bias: **do not spawn a sub-agent when a direct response or direct tool call is sufficient**. diff --git a/src/openhuman/agent/harness/subagent_runner/ops.rs b/src/openhuman/agent/harness/subagent_runner/ops.rs index b8ec65303..408fb756a 100644 --- a/src/openhuman/agent/harness/subagent_runner/ops.rs +++ b/src/openhuman/agent/harness/subagent_runner/ops.rs @@ -1344,3 +1344,7 @@ fn parse_tool_arguments(arguments: &str) -> serde_json::Value { #[cfg(test)] #[path = "ops_tests.rs"] mod tests; + +#[cfg(test)] +#[path = "ops_truncation_tests.rs"] +mod truncation_tests; diff --git a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs index e6dc740f4..d2829b499 100644 --- a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs +++ b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs @@ -638,65 +638,5 @@ async fn typed_mode_progress_emission_is_a_noop_without_sink() { assert_eq!(outcome.iterations, 1); } -#[test] -fn max_result_chars_cap_is_enforced() { - // Verify that max_result_chars truncation uses char count (not bytes) - // and produces a truncated result ending with "[...truncated]". - let cap = 10usize; - let input = "hello world this is long".to_string(); - // Simulate the logic in ops.rs (char-safe path). - let original_chars = input.chars().count(); - let mut output = input.clone(); - if original_chars > cap { - let byte_offset = output - .char_indices() - .nth(cap) - .map(|(i, _)| i) - .unwrap_or(output.len()); - output.truncate(byte_offset); - output.push_str("\n[...truncated]"); - } - assert_eq!(&output[..10], "hello worl"); - assert!(output.ends_with("[...truncated]")); -} - -#[test] -fn max_result_chars_cap_is_char_safe_for_multibyte() { - // A cap landing in the middle of a multi-byte UTF-8 sequence must - // not panic. "café" has 4 chars but the 'é' is 2 bytes — truncating - // at byte 4 with the old String::truncate() would panic. - let cap = 3usize; // keep "caf", drop "é" - let input = "café latte".to_string(); - let original_chars = input.chars().count(); - let mut output = input.clone(); - if original_chars > cap { - let byte_offset = output - .char_indices() - .nth(cap) - .map(|(i, _)| i) - .unwrap_or(output.len()); - output.truncate(byte_offset); - output.push_str("\n[...truncated]"); - } - assert_eq!(output, "caf\n[...truncated]"); -} - -#[test] -fn max_result_chars_not_applied_when_none() { - let cap: Option = None; - let original = "short output".to_string(); - let mut output = original.clone(); - if let Some(c) = cap { - let char_len = output.chars().count(); - if char_len > c { - let byte_offset = output - .char_indices() - .nth(c) - .map(|(i, _)| i) - .unwrap_or(output.len()); - output.truncate(byte_offset); - output.push_str("\n[...truncated]"); - } - } - assert_eq!(output, original); -} +// Truncation tests live in ops_truncation_tests.rs to keep this file +// under the ~500-line guideline. diff --git a/src/openhuman/agent/harness/subagent_runner/ops_truncation_tests.rs b/src/openhuman/agent/harness/subagent_runner/ops_truncation_tests.rs new file mode 100644 index 000000000..e8141c07c --- /dev/null +++ b/src/openhuman/agent/harness/subagent_runner/ops_truncation_tests.rs @@ -0,0 +1,67 @@ +/// Tests for the `max_result_chars` truncation logic in `ops.rs`. +/// +/// Kept in a dedicated file so `ops_tests.rs` stays under ~500 lines. +/// The logic under test lives in `run_subagent` — tests here cover the +/// char-safe truncation path directly without spinning up a provider. + +#[test] +fn max_result_chars_cap_is_enforced() { + // Verify that max_result_chars truncation uses char count (not bytes) + // and produces a truncated result ending with "[...truncated]". + let cap = 10usize; + let input = "hello world this is long".to_string(); + let original_chars = input.chars().count(); + let mut output = input.clone(); + if original_chars > cap { + let byte_offset = output + .char_indices() + .nth(cap) + .map(|(i, _)| i) + .unwrap_or(output.len()); + output.truncate(byte_offset); + output.push_str("\n[...truncated]"); + } + assert_eq!(&output[..10], "hello worl"); + assert!(output.ends_with("[...truncated]")); +} + +#[test] +fn max_result_chars_cap_is_char_safe_for_multibyte() { + // A cap landing in the middle of a multi-byte UTF-8 sequence must + // not panic. "café" has 4 chars but 'é' is 2 bytes — truncating at + // byte offset 4 with a raw String::truncate() would panic. + let cap = 3usize; // keep "caf", drop "é" + let input = "café latte".to_string(); + let original_chars = input.chars().count(); + let mut output = input.clone(); + if original_chars > cap { + let byte_offset = output + .char_indices() + .nth(cap) + .map(|(i, _)| i) + .unwrap_or(output.len()); + output.truncate(byte_offset); + output.push_str("\n[...truncated]"); + } + assert_eq!(output, "caf\n[...truncated]"); +} + +#[test] +fn max_result_chars_not_applied_when_none() { + let cap: Option = None; + let original = "short output".to_string(); + let mut output = original.clone(); + if let Some(c) = cap { + let char_len = output.chars().count(); + if char_len > c { + let byte_offset = output + .char_indices() + .nth(c) + .map(|(i, _)| i) + .unwrap_or(output.len()); + output.truncate(byte_offset); + output.push_str("\n[...truncated]"); + } + } + assert_eq!(output, original); +} diff --git a/tests/agent_retrieval_e2e.rs b/tests/agent_retrieval_e2e.rs index 3bd7e2935..3ea560ece 100644 --- a/tests/agent_retrieval_e2e.rs +++ b/tests/agent_retrieval_e2e.rs @@ -91,9 +91,14 @@ fn alice_phoenix_thread() -> EmailThread { #[test] fn orchestrator_lists_memory_tree_tools() { let toml = include_str!("../src/openhuman/agent/agents/orchestrator/agent.toml"); + // Exact entry match — substring match would also hit comments or prefixed names. + let has_memory_tree_entry = toml + .lines() + .map(str::trim) + .any(|line| line == "\"memory_tree\"" || line == "\"memory_tree\","); assert!( - toml.contains("memory_tree"), - "orchestrator agent.toml must list 'memory_tree' so the LLM can call it" + has_memory_tree_entry, + "orchestrator agent.toml must list 'memory_tree' as a named tool entry" ); // Verify the old individual tool names are gone — they were removed in #1141 // when all 6 were consolidated into the single `memory_tree` dispatcher. @@ -105,8 +110,14 @@ fn orchestrator_lists_memory_tree_tools() { "memory_tree_drill_down", "memory_tree_fetch_leaves", ] { + let entry = format!("\"{old_name}\""); + let entry_comma = format!("\"{old_name}\","); + let old_tool_present = toml + .lines() + .map(str::trim) + .any(|line| line == entry || line == entry_comma); assert!( - !toml.contains(old_name), + !old_tool_present, "orchestrator agent.toml must NOT list '{old_name}' — removed in #1141 (use 'memory_tree' with mode= dispatch)" ); }