From a1ac5b3334c153cb330d15aed61fc1a67fb7d73a Mon Sep 17 00:00:00 2001
From: M3gA-Mind <megamind@mahadao.com>
Date: Thu, 7 May 2026 03:40:58 +0530
Subject: [PATCH 1/9] perf(agent): orchestrator harness efficiency improvements
 (#1141)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Re-enable payload summarizer at 4,000-token threshold (was disabled at
  0 after recursive-dispatch was observed; root cause was the missing
  `omit_skills_catalog = true` guard on the summarizer archetype)
- Skip `render_tools()` catalogue for native function-calling sessions
  (provider already sends JSON schemas in the API payload — duplicate in
  the system prompt was pure token bloat)
- Consolidate 6 separate `memory_tree_*` tools into one `memory_tree`
  tool dispatched by `mode` enum, shrinking the orchestrator's tool
  surface by 5 entries per API call
- Remove `schedule` and `spawn_subagent` from orchestrator tool list;
  `cron_add/cron_list/cron_remove` are the canonical scheduling surface
  and `delegate_*` + `spawn_worker_thread` replace `spawn_subagent`
- Remove redundant "Available Sub-Agents" and "Direct Tools" tables from
  orchestrator prompt (LLM already sees these as native API tool schemas)
- Compact `render_delegation_guide` to a single line per toolkit instead
  of a verbose multi-line spawn_subagent snippet
- Add `max_result_chars` to `AgentDefinition`; set 8k for planner and
  researcher, 16k for code_executor to prevent context flooding
- Wrap `ParentExecutionContext::memory_context` in `Arc<Option<String>>`
  so sub-agent spawns share the same allocation instead of cloning
- Add `docs/DELEGATION_POLICY.md` codifying the 4-tier direct-first
  decision tree
- Unit tests for all new behavior (ToolsSection native skip,
  memory_tree dispatcher modes, max_result_chars truncation,
  delegation guide compact format)

Closes #1141
---
 docs/DELEGATION_POLICY.md                     |  31 +++
 .../agent/agents/code_executor/agent.toml     |   1 +
 .../agent/agents/orchestrator/agent.toml      |  16 +-
 .../agent/agents/orchestrator/prompt.md       |  63 ++----
 .../agent/agents/orchestrator/prompt.rs       |  42 ++--
 src/openhuman/agent/agents/planner/agent.toml |   1 +
 .../agent/agents/researcher/agent.toml        |   1 +
 src/openhuman/agent/harness/definition.rs     |   8 +
 .../agent/harness/definition_tests.rs         |   1 +
 src/openhuman/agent/harness/fork_context.rs   |   4 +-
 .../agent/harness/payload_summarizer.rs       |   1 +
 src/openhuman/agent/harness/session/turn.rs   |   2 +-
 .../agent/harness/subagent_runner/ops.rs      |  18 +-
 .../harness/subagent_runner/ops_tests.rs      |  36 +++-
 src/openhuman/agent/prompts/mod.rs            |   6 +
 src/openhuman/agent/prompts/mod_tests.rs      |  79 ++++++-
 src/openhuman/agent/triage/escalation.rs      |   2 +-
 src/openhuman/channels/runtime/dispatch.rs    |   1 +
 src/openhuman/config/schema/context.rs        |  16 +-
 .../tools/impl/agent/spawn_worker_thread.rs   |   2 +-
 src/openhuman/tools/impl/memory/tree/mod.rs   | 192 ++++++++++++++++--
 src/openhuman/tools/ops.rs                    |   7 +-
 src/openhuman/tools/orchestrator_tools.rs     |   1 +
 tests/agent_harness_public.rs                 |   2 +-
 tests/calendar_grounding_e2e.rs               |   2 +-
 25 files changed, 407 insertions(+), 128 deletions(-)
 create mode 100644 docs/DELEGATION_POLICY.md
diff --git a/docs/DELEGATION_POLICY.md b/docs/DELEGATION_POLICY.md
new file mode 100644
index 000000000..7b163a60b
--- /dev/null
+++ b/docs/DELEGATION_POLICY.md
@@ -0,0 +1,31 @@
+# Delegation Policy
+
+## When to delegate vs. act directly
+
+The orchestrator follows a direct-first policy. This document codifies the four-tier decision tree the orchestrator applies to every user message.
+
+## Tier 1 — Reply directly (no tools)
+Apply when: small talk, simple factual Q&A, acknowledgements, clarification requests, context already in the system prompt.
+Cost: 0 tokens (output only).
+Rule: if you can answer without calling any tool, do so.
+
+## Tier 2 — Use a direct tool
+Apply when: the task needs a tool but not specialised execution (time lookup, memory read/write, cron scheduling, workspace state, listing connections).
+Cost: 1 tool call + parse overhead (~200-400 tokens).
+Rule: prefer `current_time`, `cron_*`, `memory_*`, `memory_tree`, `read_workspace_state`, `composio_list_connections`, `ask_user_clarification`.
+
+## Tier 3 — Spawn a sub-agent (inline)
+Apply when: the task requires specialised execution (writing code, crawling docs, running shell, calling an external integration) that the orchestrator cannot do directly.
+Cost: full sub-agent turn (~1-5k tokens depending on archetype).
+Rule: spawn the narrowest archetype that can complete the task. Prefer inline spawn (`spawn_worker_thread` with no dedicated thread) for tasks that complete in <5 turns.
+
+## Tier 4 — Spawn a dedicated worker thread
+Apply when: the task is long (>5 turns estimated), produces a large transcript, or the user explicitly wants it tracked as a separate thread.
+Cost: same as Tier 3 but the parent thread is not flooded.
+Rule: use `spawn_worker_thread` and surface a brief summary back to the parent. Do not chain workers (workers cannot spawn workers).
+
+## Anti-patterns to avoid
+- Spawning a sub-agent to answer a question the orchestrator already has context for.
+- Delegating a tool call to a sub-agent when `current_tier <= 2` applies.
+- Using `spawn_subagent` when `delegate_{archetype}` covers the task — `delegate_*` tools carry the full archetype definition and have correct tool filtering pre-configured.
+- Passing the entire parent conversation as context to a sub-agent — pass only the task-relevant slice.
diff --git a/src/openhuman/agent/agents/code_executor/agent.toml b/src/openhuman/agent/agents/code_executor/agent.toml
index 3ee1c5d6f..1482d31c1 100644
--- a/src/openhuman/agent/agents/code_executor/agent.toml
+++ b/src/openhuman/agent/agents/code_executor/agent.toml
@@ -4,6 +4,7 @@ delegate_name = "run_code"
 when_to_use = "Sandboxed developer — writes, runs, and debugs code until tests pass. Use for any task that requires producing or modifying source files and exercising them with shell or test commands."
 temperature = 0.4
 max_iterations = 10
+max_result_chars = 16000
 sandbox_mode = "sandboxed"
 omit_identity = true
 omit_memory_context = true
diff --git a/src/openhuman/agent/agents/orchestrator/agent.toml b/src/openhuman/agent/agents/orchestrator/agent.toml
index f8946e008..e540f0904 100644
--- a/src/openhuman/agent/agents/orchestrator/agent.toml
+++ b/src/openhuman/agent/agents/orchestrator/agent.toml
@@ -59,9 +59,7 @@ hint = "reasoning"
 
 [tools]
 # Direct tools — things the orchestrator calls itself rather than
-# delegating. `spawn_subagent` is retained as an advanced fallback so
-# power users can still spawn arbitrary agent ids that are not listed
-# in `subagents` above (e.g. workspace-override custom agents).
+# delegating.
 #
 # `composio_list_connections` is the orchestrator's only composio_*
 # tool: it exists so the agent can detect newly-authorised integrations
@@ -74,28 +72,20 @@ named = [
     "query_memory",
     "memory_store",
     "memory_forget",
-    "memory_tree_search_entities",
-    "memory_tree_query_topic",
-    "memory_tree_query_source",
-    "memory_tree_query_global",
-    "memory_tree_drill_down",
-    "memory_tree_fetch_leaves",
+    "memory_tree",
     "read_workspace_state",
     "ask_user_clarification",
-    "spawn_subagent",
     "spawn_worker_thread",
     "composio_list_connections",
     # Time + scheduling — lets the orchestrator answer "what time is it",
     # "remind me in 10 minutes", "every morning at 8" directly rather than
     # delegating or telling the user it can't. `current_time` grounds
     # relative-time parsing; `cron_add` / `cron_list` / `cron_remove`
-    # manage recurring + one-shot agent/shell jobs; `schedule` is the
-    # simpler shell-only alias for one-shot reminders.
+    # manage recurring + one-shot agent/shell jobs.
     "current_time",
     "cron_add",
     "cron_list",
     "cron_remove",
-    "schedule",
     # Coding-harness coordination primitives from #1208. `todowrite`
     # gives the orchestrator a shared todo store to track multi-step
     # work across delegations; `plan_exit` is the stable marker that
diff --git a/src/openhuman/agent/agents/orchestrator/prompt.md b/src/openhuman/agent/agents/orchestrator/prompt.md
index af81f4f83..23c04c66e 100644
--- a/src/openhuman/agent/agents/orchestrator/prompt.md
+++ b/src/openhuman/agent/agents/orchestrator/prompt.md
@@ -30,43 +30,7 @@ Follow this sequence for every user message:
 
 Default bias: **do not spawn a sub-agent when a direct response or direct tool call is sufficient**.
 
-## Available Sub-Agents
-
-| Archetype         | When to Use                                                                |
-| ----------------- | -------------------------------------------------------------------------- |
-| **Planner**       | Complex tasks that need a multi-step plan before execution.                |
-| **Code Executor** | Writing, modifying, or running code. Runs sandboxed.                       |
-| **Skills Agent**  | Interacting with connected services (Notion, Gmail, etc.) via skill tools. |
-| **Tool-Maker**    | When a sub-agent reports a missing command — writes polyfill scripts.      |
-| **Researcher**    | Finding information in docs, web, or files. Compresses to dense markdown.  |
-| **Critic**        | Reviewing code changes for quality, security, and adherence to standards.  |
-
-## Direct Tools (call these yourself — no delegation needed)
-
-Some capabilities are cheap, read-only, or purely declarative — delegating them
-to a sub-agent wastes a turn. Use these directly:
-
-| Tool                        | When to use                                                                                               |
-| --------------------------- | --------------------------------------------------------------------------------------------------------- |
-| `current_time`              | Any time the user refers to "now", "in 10 minutes", "tomorrow", "tonight", or before scheduling anything. |
-| `cron_add` / `cron_list` / `cron_remove` | Reminders, recurring tasks, follow-ups. Use `job_type: "agent"` with a `prompt` to have a future agent run fire (e.g. send a pushover reminder). Use cron expressions for recurring, `at` for one-shot absolute times, `every` for fixed intervals. |
-| `schedule`                  | Lightweight alias for one-shot shell reminders. Prefer `cron_add` with `job_type: "agent"` for anything that should produce a user-visible message. |
-| `query_memory`              | Pull long-term user context (preferences, past conversations, saved notes) before answering personal questions. |
-| `memory_store` / `memory_forget` | Persist a fact the user asked you to remember, or drop one they asked you to forget. |
-| `read_workspace_state`      | Get git status + file tree before planning a code task.                                                   |
-| `composio_list_connections` | Check which external integrations (Gmail, Notion, GitHub, …) the user has authorised *right now*. Session-start list may be stale. |
-| `ask_user_clarification`    | Ask one focused question when the request is ambiguous — don't guess.                                     |
-| `spawn_subagent`            | Inline delegation: the sub-agent's work is collapsed into a single result in this thread. Use for quick tasks. |
-| `spawn_worker_thread`       | Dedicated delegation: creates a fresh 'worker' thread for the sub-agent. Use for long, complex, or multi-step tasks to avoid cluttering the parent thread. |
-
-**Scheduling rule of thumb.** To "remind me in 10 minutes", call `current_time`
-first. If `cron_add` is available and enabled for this runtime, then call
-`cron_add` with `schedule = {kind:"at", at:"<iso-time>"}`, `job_type:"agent"`,
-and a `prompt` that tells a future agent what to deliver (e.g. "Send pushover:
-'stand up and stretch'"). If `cron_add` is disabled by config, absent from your
-tool list, or returns an error, do not promise the reminder: tell the user you
-can't schedule it in this environment and, if helpful, provide the computed time
-or a manual fallback.
+When delegating: use `delegate_researcher` for web/doc lookups, `delegate_run_code` for coding, `delegate_plan` for complex decomposition, `delegate_critic` for reviews, `delegate_archivist` for memory writes, `delegate_{toolkit}` for external integrations. Use `spawn_worker_thread` for long tasks that need their own thread.
 
 ## Rules
 
@@ -77,6 +41,15 @@ or a manual fallback.
 - **Fail gracefully** — If a sub-agent fails after retries, explain what happened clearly.
 - **Escalate when appropriate** — If orchestration is the wrong mode or a specialist cannot make progress, hand control back to OpenHuman Core with a concise explanation and let Core handle general interactions.
 
+**Scheduling rule of thumb.** To "remind me in 10 minutes", call `current_time`
+first. If `cron_add` is available and enabled for this runtime, then call
+`cron_add` with `schedule = {kind:"at", at:"<iso-time>"}`, `job_type:"agent"`,
+and a `prompt` that tells a future agent what to deliver (e.g. "Send pushover:
+'stand up and stretch'"). If `cron_add` is disabled by config, absent from your
+tool list, or returns an error, do not promise the reminder: tell the user you
+can't schedule it in this environment and, if helpful, provide the computed time
+or a manual fallback.
+
 ## Dedicated worker threads
 
 `spawn_subagent` accepts an optional `dedicated_thread: true` flag. When set, the
@@ -146,16 +119,16 @@ User: what time is it?
 
 ## Memory tree retrieval
 
-Six tools query the user's ingested email/chat/document memory:
+Use `memory_tree` with a `mode` argument to query the user's ingested email/chat/document history:
 
-- `memory_tree_search_entities(query)` — resolve a name to a canonical id (e.g. "alice" → `email:alice@example.com`). ALWAYS call this first when the user mentions someone by name.
-- `memory_tree_query_topic(entity_id, query?)` — all mentions of an entity, cross-source. Pass `query` for semantic rerank.
-- `memory_tree_query_source(source_kind?, time_window_days?, query?)` — filter by source type (chat/email/document) and time window. Use for "in my email last week…" intents.
-- `memory_tree_query_global(window_days)` — cross-source daily digest (the 7-day digest is pre-loaded into context on session start and refreshed every ~30 min, so only call this for a different window or to refresh on demand).
-- `memory_tree_drill_down(node_id)` — when a summary is too coarse, expand it one level.
-- `memory_tree_fetch_leaves(chunk_ids)` — pull raw chunks for citation.
+- `mode: "search_entities"` — resolve a name to a canonical id (e.g. "alice" → `email:alice@example.com`). ALWAYS call this first when the user mentions someone by name.
+- `mode: "query_topic"` — all cross-source mentions of an `entity_id` from `search_entities`.
+- `mode: "query_source"` — filter by `source_kind` (chat/email/document) and `time_window_days`. Use for "in my email last week…" intents.
+- `mode: "query_global"` — cross-source daily digest over `time_window_days` (7-day digest is pre-loaded into context on session start — only call for a different window or to force refresh).
+- `mode: "drill_down"` — expand a coarse `node_id` summary one level.
+- `mode: "fetch_leaves"` — pull raw `chunk_ids` for citation.
 
-Top-down expansion is the cost-control story: start with cheap summaries (`query_*`), only call `drill_down` / `fetch_leaves` when the user wants details or you need a quote.
+Start cheap (query_* summaries), only drill_down/fetch_leaves when you need verbatim content.
 
 ## Citations
 
diff --git a/src/openhuman/agent/agents/orchestrator/prompt.rs b/src/openhuman/agent/agents/orchestrator/prompt.rs
index 9f1b3003f..2e043afec 100644
--- a/src/openhuman/agent/agents/orchestrator/prompt.rs
+++ b/src/openhuman/agent/agents/orchestrator/prompt.rs
@@ -75,21 +75,14 @@ fn render_delegation_guide(integrations: &[ConnectedIntegration]) -> String {
         return String::new();
     }
     let mut out = String::from(
-        "## Delegation Guide — Integrations\n\n\
-         For any task that touches one of these external services, \
-         delegate to `integrations_agent` with the matching `toolkit` \
-         argument. The sub-agent receives the full action catalogue \
-         for that integration as native tool schemas — do not attempt \
-         to call integration actions directly from this agent.\n\n\
-         Only the integrations listed below are currently authorised. \
-         If the user asks about another service, tell them to connect \
-         it in **Skills** page before retrying.\n\n",
+        "## Connected Integrations\n\n\
+         Delegate tasks for these services using the matching `delegate_{toolkit}` tool:\n\n",
     );
     for ci in connected {
         let _ = writeln!(
             out,
-            "- **{}** — {}\n  Delegate with: `spawn_subagent(agent_id=\"integrations_agent\", toolkit=\"{}\", prompt=<task>)`",
-            ci.toolkit, ci.description, ci.toolkit,
+            "- **{}** (delegate via `delegate_{}`): {}",
+            ci.toolkit, ci.toolkit, ci.description
         );
     }
     out
@@ -127,7 +120,7 @@ mod tests {
     fn build_returns_nonempty_body() {
         let body = build(&ctx_with(&[])).unwrap();
         assert!(!body.is_empty());
-        assert!(!body.contains("## Delegation Guide"));
+        assert!(!body.contains("## Connected Integrations"));
     }
 
     #[test]
@@ -154,14 +147,29 @@ mod tests {
             connected: true,
         }];
         let body = build(&ctx_with(&integrations)).unwrap();
-        assert!(body.contains("## Delegation Guide — Integrations"));
-        assert!(body.contains(
-            "spawn_subagent(agent_id=\"integrations_agent\", toolkit=\"gmail\", prompt=<task>)"
-        ));
+        assert!(body.contains("## Connected Integrations"));
+        assert!(body.contains("delegate_gmail"));
+        // Must NOT contain the old verbose spawn_subagent snippet.
+        assert!(!body.contains("spawn_subagent(agent_id=\"integrations_agent\""));
         // Delegator voice must NOT use the skill-executor wording.
         assert!(!body.contains("You have direct access"));
     }
 
+    #[test]
+    fn delegation_guide_uses_compact_delegate_format() {
+        let integrations = vec![ConnectedIntegration {
+            toolkit: "gmail".into(),
+            description: "Email access.".into(),
+            tools: Vec::new(),
+            connected: true,
+        }];
+        let body = build(&ctx_with(&integrations)).unwrap();
+        assert!(body.contains("## Connected Integrations"));
+        assert!(body.contains("delegate_gmail"));
+        // Must NOT contain the old verbose spawn_subagent snippet.
+        assert!(!body.contains("spawn_subagent(agent_id=\"integrations_agent\""));
+    }
+
     #[test]
     fn build_hides_unconnected_integrations() {
         // Only connected toolkits make it into the Delegation Guide
@@ -196,6 +204,6 @@ mod tests {
             connected: false,
         }];
         let body = build(&ctx_with(&integrations)).unwrap();
-        assert!(!body.contains("## Delegation Guide"));
+        assert!(!body.contains("## Connected Integrations"));
     }
 }
diff --git a/src/openhuman/agent/agents/planner/agent.toml b/src/openhuman/agent/agents/planner/agent.toml
index 08f9bd895..722948da2 100644
--- a/src/openhuman/agent/agents/planner/agent.toml
+++ b/src/openhuman/agent/agents/planner/agent.toml
@@ -4,6 +4,7 @@ delegate_name = "plan"
 when_to_use = "Architect — break a complex task into a small DAG of subtasks with explicit acceptance criteria. Reads memory and searches the web to ground plans in real context. Read-only; produces JSON, not code."
 temperature = 0.4
 max_iterations = 8
+max_result_chars = 8000
 sandbox_mode = "read_only"
 omit_identity = true
 omit_memory_context = false
diff --git a/src/openhuman/agent/agents/researcher/agent.toml b/src/openhuman/agent/agents/researcher/agent.toml
index 191e9ecc4..b496f2809 100644
--- a/src/openhuman/agent/agents/researcher/agent.toml
+++ b/src/openhuman/agent/agents/researcher/agent.toml
@@ -4,6 +4,7 @@ delegate_name = "research"
 when_to_use = "Web & docs crawler — reads real documentation, compresses to dense markdown. Use for any task that requires looking up external knowledge."
 temperature = 0.4
 max_iterations = 8
+max_result_chars = 8000
 sandbox_mode = "none"
 omit_identity = true
 omit_memory_context = true
diff --git a/src/openhuman/agent/harness/definition.rs b/src/openhuman/agent/harness/definition.rs
index 864ef757b..d71f6fc81 100644
--- a/src/openhuman/agent/harness/definition.rs
+++ b/src/openhuman/agent/harness/definition.rs
@@ -127,6 +127,14 @@ pub struct AgentDefinition {
     #[serde(default = "defaults::max_iterations")]
     pub max_iterations: usize,
 
+    /// Maximum character length for this sub-agent's output before the
+    /// harness truncates it before feeding it back as a tool result to the
+    /// parent. `None` means no cap (the default for most agents). Set to
+    /// a value for research/planner/code agents to prevent context flooding
+    /// from large outputs.
+    #[serde(default)]
+    pub max_result_chars: Option<usize>,
+
     /// Wall-clock timeout for the sub-agent's execution (seconds).
     #[serde(default)]
     pub timeout_secs: Option<u64>,
diff --git a/src/openhuman/agent/harness/definition_tests.rs b/src/openhuman/agent/harness/definition_tests.rs
index e48018654..9606f2f2b 100644
--- a/src/openhuman/agent/harness/definition_tests.rs
+++ b/src/openhuman/agent/harness/definition_tests.rs
@@ -19,6 +19,7 @@ fn make_def(id: &str) -> AgentDefinition {
         skill_filter: None,
         extra_tools: vec![],
         max_iterations: 8,
+        max_result_chars: None,
         timeout_secs: None,
         sandbox_mode: SandboxMode::None,
         background: false,
diff --git a/src/openhuman/agent/harness/fork_context.rs b/src/openhuman/agent/harness/fork_context.rs
index 9df42566d..a624aa057 100644
--- a/src/openhuman/agent/harness/fork_context.rs
+++ b/src/openhuman/agent/harness/fork_context.rs
@@ -69,7 +69,9 @@ pub struct ParentExecutionContext {
     /// Memory context loaded for the current turn. Auto-injected into
     /// subagent prompts so they have access to conversation history and
     /// skill sync data without running their own memory queries.
-    pub memory_context: Option<String>,
+    /// Wrapped in `Arc` so cloning into sub-agents is O(1) — a reference
+    /// count bump rather than a full string copy per spawn.
+    pub memory_context: Arc<Option<String>>,
 
     /// Parent's event-bus session id (for tracing & DomainEvents).
     pub session_id: String,
diff --git a/src/openhuman/agent/harness/payload_summarizer.rs b/src/openhuman/agent/harness/payload_summarizer.rs
index d3ce41242..5459b6390 100644
--- a/src/openhuman/agent/harness/payload_summarizer.rs
+++ b/src/openhuman/agent/harness/payload_summarizer.rs
@@ -359,6 +359,7 @@ mod tests {
             skill_filter: None,
             extra_tools: vec![],
             max_iterations: 1,
+            max_result_chars: None,
             timeout_secs: None,
             sandbox_mode: SandboxMode::None,
             background: false,
diff --git a/src/openhuman/agent/harness/session/turn.rs b/src/openhuman/agent/harness/session/turn.rs
index b234b19a5..b22d32a17 100644
--- a/src/openhuman/agent/harness/session/turn.rs
+++ b/src/openhuman/agent/harness/session/turn.rs
@@ -1058,7 +1058,7 @@ impl Agent {
             memory: Arc::clone(&self.memory),
             agent_config: self.config.clone(),
             skills: Arc::new(self.skills.clone()),
-            memory_context: self.last_memory_context.clone(),
+            memory_context: Arc::new(self.last_memory_context.clone()),
             session_id: self.event_session_id().to_string(),
             channel: self.event_channel().to_string(),
             connected_integrations: self.connected_integrations.clone(),
diff --git a/src/openhuman/agent/harness/subagent_runner/ops.rs b/src/openhuman/agent/harness/subagent_runner/ops.rs
index bc7499d00..0a34bc963 100644
--- a/src/openhuman/agent/harness/subagent_runner/ops.rs
+++ b/src/openhuman/agent/harness/subagent_runner/ops.rs
@@ -99,11 +99,25 @@ pub async fn run_subagent(
     // want to gate on it (e.g. `composio_execute` rejecting
     // Write/Admin slugs under `ReadOnly`) read it via
     // `current_sandbox_mode()`; tools that don't care just ignore it.
-    let outcome = with_current_sandbox_mode(definition.sandbox_mode, async {
+    let mut outcome = with_current_sandbox_mode(definition.sandbox_mode, async {
         run_typed_mode(definition, task_prompt, &options, &parent, &task_id).await
     })
     .await?;
 
+    // Truncate result to the definition's cap if set.
+    if let Some(cap) = definition.max_result_chars {
+        if outcome.output.len() > cap {
+            tracing::debug!(
+                agent_id = %definition.id,
+                original_chars = outcome.output.len(),
+                cap,
+                "[subagent_runner] truncating oversized result to max_result_chars cap"
+            );
+            outcome.output.truncate(cap);
+            outcome.output.push_str("\n[...truncated]");
+        }
+    }
+
     tracing::info!(
         agent_id = %definition.id,
         task_id = %task_id,
@@ -688,7 +702,7 @@ async fn run_typed_mode(
 
     let mut context_parts: Vec<&str> = Vec::new();
     if !definition.omit_memory_context {
-        if let Some(ref mem_ctx) = parent.memory_context {
+        if let Some(ref mem_ctx) = *parent.memory_context {
             context_parts.push(mem_ctx);
         }
     }
diff --git a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs
index 87abd6cab..c5dfce69c 100644
--- a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs
+++ b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs
@@ -20,6 +20,7 @@ fn make_def_named_tools(names: &[&str]) -> AgentDefinition {
         skill_filter: None,
         extra_tools: vec![],
         max_iterations: 5,
+        max_result_chars: None,
         timeout_secs: None,
         sandbox_mode: crate::openhuman::agent::harness::definition::SandboxMode::None,
         background: false,
@@ -217,7 +218,7 @@ fn make_parent(provider: Arc<dyn Provider>, tools: Vec<Box<dyn Tool>>) -> Parent
         memory: noop_memory(),
         agent_config: crate::openhuman::config::AgentConfig::default(),
         skills: Arc::new(vec![]),
-        memory_context: None,
+        memory_context: Arc::new(None),
         session_id: "test-session".into(),
         channel: "test".into(),
         connected_integrations: vec![],
@@ -385,7 +386,9 @@ async fn typed_mode_no_memory_context_in_user_message() {
 async fn typed_mode_includes_memory_context_when_definition_allows_it() {
     let provider = ScriptedProvider::new(vec![text_response("ok")]);
     let mut parent = make_parent(provider.clone(), vec![stub("file_read")]);
-    parent.memory_context = Some("[Memory context]\n- prior fact: branch X failed\n".into());
+    parent.memory_context = Arc::new(Some(
+        "[Memory context]\n- prior fact: branch X failed\n".into(),
+    ));
     let mut def = make_def_named_tools(&[]);
     def.omit_memory_context = false;
 
@@ -634,3 +637,32 @@ async fn typed_mode_progress_emission_is_a_noop_without_sink() {
     .expect("runner should succeed");
     assert_eq!(outcome.iterations, 1);
 }
+
+#[test]
+fn max_result_chars_cap_is_enforced() {
+    // Verify that `max_result_chars` truncation logic is correct.
+    // We test the truncation formula directly since running the full
+    // subagent loop requires a provider.
+    let cap = 10usize;
+    let mut output = "hello world this is long".to_string();
+    if output.len() > cap {
+        output.truncate(cap);
+        output.push_str("\n[...truncated]");
+    }
+    assert!(output.starts_with("hello worl"));
+    assert!(output.ends_with("[...truncated]"));
+}
+
+#[test]
+fn max_result_chars_not_applied_when_none() {
+    let cap: Option<usize> = None;
+    let original = "short output".to_string();
+    let mut output = original.clone();
+    if let Some(c) = cap {
+        if output.len() > c {
+            output.truncate(c);
+            output.push_str("\n[...truncated]");
+        }
+    }
+    assert_eq!(output, original);
+}
diff --git a/src/openhuman/agent/prompts/mod.rs b/src/openhuman/agent/prompts/mod.rs
index c295d71f2..b589d79e8 100644
--- a/src/openhuman/agent/prompts/mod.rs
+++ b/src/openhuman/agent/prompts/mod.rs
@@ -395,6 +395,12 @@ impl PromptSection for ToolsSection {
     }
 
     fn build(&self, ctx: &PromptContext<'_>) -> Result<String> {
+        // Native function-calling: the provider already sends full JSON
+        // schemas in the API request — no need to repeat them in the
+        // system prompt. Skip the catalogue to save tokens.
+        if ctx.tool_call_format == ToolCallFormat::Native {
+            return Ok(String::new());
+        }
         let mut out = String::from("## Tools\n\n");
         let has_filter = !ctx.visible_tool_names.is_empty();
         for tool in ctx.tools {
diff --git a/src/openhuman/agent/prompts/mod_tests.rs b/src/openhuman/agent/prompts/mod_tests.rs
index a9ae66fdf..8e7d85cb5 100644
--- a/src/openhuman/agent/prompts/mod_tests.rs
+++ b/src/openhuman/agent/prompts/mod_tests.rs
@@ -318,20 +318,14 @@ fn tools_section_pformat_renders_signature_not_schema() {
 }
 
 #[test]
-fn tools_section_uses_pformat_signature_for_every_dispatcher() {
-    // Tool rendering is uniform across dispatchers: always the
+fn tools_section_uses_pformat_signature_for_text_dispatchers() {
+    // Tool rendering is uniform across text dispatchers: always the
     // compact `Call as: name[args]` signature, never a raw JSON
-    // schema dump. Native tool calls still carry the full schema
-    // in the provider request; the `Json` / `PFormat` text
-    // dispatchers supply any extra framing via
-    // `ctx.dispatcher_instructions`.
+    // schema dump. Native tool calls are handled differently — see
+    // `tools_section_empty_for_native` below.
     let tools: Vec<Box<dyn Tool>> = vec![Box::new(TestTool)];
     let prompt_tools = PromptTool::from_tools(&tools);
-    for format in [
-        ToolCallFormat::PFormat,
-        ToolCallFormat::Json,
-        ToolCallFormat::Native,
-    ] {
+    for format in [ToolCallFormat::PFormat, ToolCallFormat::Json] {
         let ctx = PromptContext {
             workspace_dir: Path::new("/tmp"),
             model_name: "test-model",
@@ -1327,3 +1321,66 @@ fn user_reflections_render_above_user_memory_when_both_present() {
         "reflections must render before user-memory block"
     );
 }
+
+// ─── ToolsSection native-skip tests ──────────────────────────────────────────
+
+#[test]
+fn tools_section_empty_for_native() {
+    // Native function-calling: the provider sends full JSON schemas in the
+    // API request — repeating them in the system prompt is pure token bloat.
+    // ToolsSection must return an empty string for Native mode.
+    let tools: Vec<Box<dyn Tool>> = vec![Box::new(TestTool)];
+    let prompt_tools = PromptTool::from_tools(&tools);
+    let ctx = PromptContext {
+        workspace_dir: Path::new("/tmp"),
+        model_name: "test-model",
+        agent_id: "",
+        tools: &prompt_tools,
+        skills: &[],
+        dispatcher_instructions: "",
+        learned: LearnedContextData::default(),
+        visible_tool_names: &NO_FILTER,
+        tool_call_format: ToolCallFormat::Native,
+        connected_integrations: &[],
+        connected_identities_md: String::new(),
+        include_profile: false,
+        include_memory_md: false,
+        curated_snapshot: None,
+        user_identity: None,
+    };
+    let out = ToolsSection.build(&ctx).unwrap();
+    assert!(
+        out.is_empty(),
+        "Native mode should produce empty ToolsSection, got: {out:?}"
+    );
+}
+
+#[test]
+fn tools_section_nonempty_for_pformat() {
+    // PFormat is a text-driven format — the model discovers tools by reading
+    // the prose `## Tools` section. It must be non-empty.
+    let tools: Vec<Box<dyn Tool>> = vec![Box::new(TestTool)];
+    let prompt_tools = PromptTool::from_tools(&tools);
+    let ctx = PromptContext {
+        workspace_dir: Path::new("/tmp"),
+        model_name: "test-model",
+        agent_id: "",
+        tools: &prompt_tools,
+        skills: &[],
+        dispatcher_instructions: "",
+        learned: LearnedContextData::default(),
+        visible_tool_names: &NO_FILTER,
+        tool_call_format: ToolCallFormat::PFormat,
+        connected_integrations: &[],
+        connected_identities_md: String::new(),
+        include_profile: false,
+        include_memory_md: false,
+        curated_snapshot: None,
+        user_identity: None,
+    };
+    let out = ToolsSection.build(&ctx).unwrap();
+    assert!(
+        out.contains("## Tools"),
+        "PFormat should render tool catalogue header, got: {out:?}"
+    );
+}
diff --git a/src/openhuman/agent/triage/escalation.rs b/src/openhuman/agent/triage/escalation.rs
index c25d20a25..d4884738c 100644
--- a/src/openhuman/agent/triage/escalation.rs
+++ b/src/openhuman/agent/triage/escalation.rs
@@ -153,7 +153,7 @@ async fn dispatch_target_agent(agent_id: &str, prompt: &str) -> anyhow::Result<S
         memory: agent.memory_arc(),
         agent_config: agent.agent_config().clone(),
         skills: Arc::new(agent.skills().to_vec()),
-        memory_context: None, // Sub-agent queries memory via tools if needed
+        memory_context: Arc::new(None), // Sub-agent queries memory via tools if needed
         session_id: format!("triage-{}", uuid::Uuid::new_v4()),
         channel: "triage".to_string(),
         connected_integrations: agent.connected_integrations().to_vec(),
diff --git a/src/openhuman/channels/runtime/dispatch.rs b/src/openhuman/channels/runtime/dispatch.rs
index 310cf876e..70c9585d6 100644
--- a/src/openhuman/channels/runtime/dispatch.rs
+++ b/src/openhuman/channels/runtime/dispatch.rs
@@ -571,6 +571,7 @@ mod scoping_tests {
             skill_filter: None,
             extra_tools: vec![],
             max_iterations: 8,
+            max_result_chars: None,
             timeout_secs: None,
             sandbox_mode: SandboxMode::None,
             background: false,
diff --git a/src/openhuman/config/schema/context.rs b/src/openhuman/config/schema/context.rs
index 953ca9528..c2c698b11 100644
--- a/src/openhuman/config/schema/context.rs
+++ b/src/openhuman/config/schema/context.rs
@@ -55,10 +55,8 @@ pub struct ContextConfig {
     /// `summarizer` sub-agent (orchestrator session only). The summarizer
     /// compresses the payload into a dense note that preserves
     /// identifiers and key facts, and the compressed summary replaces
-    /// the raw payload before it enters agent history. Set to `0` to
-    /// disable summarization entirely (the default). Set to any value
-    /// `> 0` to enable summarization once a payload crosses that token
-    /// threshold.
+    /// the raw payload before it enters agent history. Default: 4000 tokens.
+    /// Set to 0 to disable.
     ///
     /// Token count is estimated as `chars / 4` (the same heuristic used
     /// by `tree_summarizer::estimate_tokens`). Pairs with
@@ -123,11 +121,11 @@ fn default_tool_result_budget_bytes() -> usize {
 }
 
 fn default_summarizer_payload_threshold_tokens() -> usize {
-    // Disabled: 0 short-circuits the payload_summarizer wiring in the
-    // agent builder (see session/builder.rs `> 0` guard). The summarizer
-    // sub-agent was being invoked recursively in some flows; keep off
-    // until that's root-caused.
-    0
+    // Re-enabled at 4000 tokens after the recursive-dispatch root cause
+    // was fixed by the `omit_skills_catalog = true` guard on the
+    // summarizer archetype (which prevents it from seeing `spawn_subagent`
+    // and thus cannot recurse). 0 would leave this entirely disabled.
+    4000
 }
 
 fn default_summarizer_max_payload_tokens() -> usize {
diff --git a/src/openhuman/tools/impl/agent/spawn_worker_thread.rs b/src/openhuman/tools/impl/agent/spawn_worker_thread.rs
index f6d8a60ff..8cc35aec7 100644
--- a/src/openhuman/tools/impl/agent/spawn_worker_thread.rs
+++ b/src/openhuman/tools/impl/agent/spawn_worker_thread.rs
@@ -384,7 +384,7 @@ mod tests {
             all_tools: Arc::new(vec![]),
             all_tool_specs: Arc::new(vec![]),
             skills: Arc::new(vec![]),
-            memory_context: None,
+            memory_context: std::sync::Arc::new(None),
             connected_integrations: vec![],
             composio_client: None,
             on_progress: None,
diff --git a/src/openhuman/tools/impl/memory/tree/mod.rs b/src/openhuman/tools/impl/memory/tree/mod.rs
index f08bc835d..c0b4c99a8 100644
--- a/src/openhuman/tools/impl/memory/tree/mod.rs
+++ b/src/openhuman/tools/impl/memory/tree/mod.rs
@@ -1,21 +1,11 @@
-//! LLM-callable wrappers for the Phase 4 memory-tree retrieval primitives
-//! (issue #710). Each tool is a thin shim over one typed function in
-//! [`crate::openhuman::memory::tree::retrieval`]; the `*_rpc` variants are
-//! intentionally avoided because they wrap responses in an `RpcOutcome`
-//! envelope that is noisier than what the LLM needs.
+//! Consolidated memory-tree tool — dispatches to the correct retrieval
+//! primitive based on the `mode` argument. Reduces the orchestrator's
+//! tool surface from 6 entries to 1.
 //!
-//! All six tools share the same shape:
-//! 1. Deserialize args into the matching `*Request` struct from
-//!    [`crate::openhuman::memory::tree::retrieval::rpc`].
-//! 2. Load the active workspace `Config` via
-//!    [`crate::openhuman::config::rpc::load_config_with_timeout`].
-//! 3. Call the typed retrieval function.
-//! 4. Serialise the response to JSON and return it as `ToolResult::success`.
-//!
-//! The tools are stateless unit structs — there is no per-instance state to
-//! carry, so they slot directly into `Vec<Box<dyn Tool>>` without needing
-//! constructors. Logs use the `[tool]` / `[memory_tree]` prefixes per the
-//! repo's debug-logging conventions.
+//! The individual per-mode structs are still re-exported for callers that
+//! need them directly (e.g. tool registration in ops.rs for agents that
+//! prefer the individual tools). The consolidated [`MemoryTreeTool`] is
+//! the recommended single entry point for the orchestrator.
 
 mod drill_down;
 mod fetch_leaves;
@@ -24,9 +14,177 @@ mod query_source;
 mod query_topic;
 mod search_entities;
 
+// Re-export individual tool types for callers that need them directly
+// (e.g. tool registration in ops.rs).
 pub use drill_down::MemoryTreeDrillDownTool;
 pub use fetch_leaves::MemoryTreeFetchLeavesTool;
 pub use query_global::MemoryTreeQueryGlobalTool;
 pub use query_source::MemoryTreeQuerySourceTool;
 pub use query_topic::MemoryTreeQueryTopicTool;
 pub use search_entities::MemoryTreeSearchEntitiesTool;
+
+use crate::openhuman::tools::traits::{Tool, ToolResult};
+use async_trait::async_trait;
+use serde_json::json;
+
+/// Single multi-mode tool that consolidates all six memory-tree retrieval
+/// primitives behind one LLM-facing entry. The `mode` field routes to the
+/// appropriate underlying implementation.
+pub struct MemoryTreeTool;
+
+#[async_trait]
+impl Tool for MemoryTreeTool {
+    fn name(&self) -> &str {
+        "memory_tree"
+    }
+
+    fn description(&self) -> &str {
+        "Query the user's ingested email/chat/document memory tree. \
+         Set `mode` to one of: `search_entities` (resolve a name to a \
+         canonical id — call first when the user mentions someone by name), \
+         `query_topic` (all cross-source mentions of an entity), \
+         `query_source` (filter by source type + time window), \
+         `query_global` (cross-source daily digest), \
+         `drill_down` (expand a coarse summary one level), \
+         `fetch_leaves` (pull raw chunks for citation)."
+    }
+
+    fn parameters_schema(&self) -> serde_json::Value {
+        json!({
+            "type": "object",
+            "properties": {
+                "mode": {
+                    "type": "string",
+                    "enum": ["search_entities", "query_topic", "query_source",
+                             "query_global", "drill_down", "fetch_leaves"],
+                    "description": "Which retrieval operation to run."
+                },
+                // search_entities params
+                "query": {
+                    "type": "string",
+                    "description": "search_entities: substring to match. query_topic/query_source: semantic rerank query (optional)."
+                },
+                "kinds": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "search_entities: optional entity kind filter (email, url, handle, person, ...)."
+                },
+                // query_topic params
+                "entity_id": {
+                    "type": "string",
+                    "description": "query_topic: canonical entity id returned by search_entities."
+                },
+                // query_source params
+                "source_kind": {
+                    "type": "string",
+                    "description": "query_source: source type to filter (chat, email, document, ...)."
+                },
+                "time_window_days": {
+                    "type": "integer",
+                    "description": "query_source/query_global: look-back window in days."
+                },
+                // drill_down params
+                "node_id": {
+                    "type": "string",
+                    "description": "drill_down: id of the summary node to expand."
+                },
+                "max_depth": {
+                    "type": "integer",
+                    "description": "drill_down: how many levels to expand (default 1, max 3)."
+                },
+                // fetch_leaves params
+                "chunk_ids": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "fetch_leaves: list of chunk ids to pull."
+                },
+                // shared
+                "limit": {
+                    "type": "integer",
+                    "description": "Max results (default varies by mode)."
+                }
+            },
+            "required": ["mode"]
+        })
+    }
+
+    async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
+        let mode = args
+            .get("mode")
+            .and_then(|v| v.as_str())
+            .ok_or_else(|| anyhow::anyhow!("memory_tree: `mode` is required"))?;
+        log::debug!("[tool][memory_tree] mode={mode}");
+        match mode {
+            "search_entities" => MemoryTreeSearchEntitiesTool.execute(args).await,
+            "query_topic" => MemoryTreeQueryTopicTool.execute(args).await,
+            "query_source" => MemoryTreeQuerySourceTool.execute(args).await,
+            "query_global" => MemoryTreeQueryGlobalTool.execute(args).await,
+            "drill_down" => MemoryTreeDrillDownTool.execute(args).await,
+            "fetch_leaves" => MemoryTreeFetchLeavesTool.execute(args).await,
+            other => Err(anyhow::anyhow!(
+                "memory_tree: unknown mode `{other}`. Valid: search_entities, query_topic, query_source, query_global, drill_down, fetch_leaves"
+            )),
+        }
+    }
+}
+
+#[cfg(test)]
+mod memory_tree_dispatcher_tests {
+    use super::*;
+    use crate::openhuman::tools::traits::Tool;
+    use serde_json::json;
+
+    #[test]
+    fn memory_tree_tool_name_is_correct() {
+        assert_eq!(MemoryTreeTool.name(), "memory_tree");
+    }
+
+    #[test]
+    fn memory_tree_schema_requires_mode() {
+        let schema = MemoryTreeTool.parameters_schema();
+        let required = schema.get("required").and_then(|r| r.as_array()).unwrap();
+        assert!(required.iter().any(|v| v.as_str() == Some("mode")));
+    }
+
+    #[test]
+    fn memory_tree_schema_mode_enum_has_all_six_modes() {
+        let schema = MemoryTreeTool.parameters_schema();
+        let modes: Vec<&str> = schema
+            .get("properties")
+            .unwrap()
+            .get("mode")
+            .unwrap()
+            .get("enum")
+            .unwrap()
+            .as_array()
+            .unwrap()
+            .iter()
+            .filter_map(|v| v.as_str())
+            .collect();
+        assert!(modes.contains(&"search_entities"));
+        assert!(modes.contains(&"query_topic"));
+        assert!(modes.contains(&"query_source"));
+        assert!(modes.contains(&"query_global"));
+        assert!(modes.contains(&"drill_down"));
+        assert!(modes.contains(&"fetch_leaves"));
+    }
+
+    #[tokio::test]
+    async fn memory_tree_unknown_mode_returns_error() {
+        let result = MemoryTreeTool
+            .execute(json!({"mode": "invalid_mode"}))
+            .await;
+        assert!(result.is_err());
+        let msg = result.unwrap_err().to_string();
+        assert!(
+            msg.contains("unknown mode"),
+            "Expected 'unknown mode' in: {msg}"
+        );
+    }
+
+    #[tokio::test]
+    async fn memory_tree_missing_mode_returns_error() {
+        let result = MemoryTreeTool.execute(json!({})).await;
+        assert!(result.is_err());
+    }
+}
diff --git a/src/openhuman/tools/ops.rs b/src/openhuman/tools/ops.rs
index 66053bd4d..5064cbec7 100644
--- a/src/openhuman/tools/ops.rs
+++ b/src/openhuman/tools/ops.rs
@@ -134,12 +134,7 @@ pub fn all_tools_with_runtime(
         Box::new(MemoryStoreTool::new(memory.clone(), security.clone())),
         Box::new(MemoryRecallTool::new(memory.clone())),
         Box::new(MemoryForgetTool::new(memory.clone(), security.clone())),
-        Box::new(MemoryTreeSearchEntitiesTool),
-        Box::new(MemoryTreeQueryTopicTool),
-        Box::new(MemoryTreeQuerySourceTool),
-        Box::new(MemoryTreeQueryGlobalTool),
-        Box::new(MemoryTreeDrillDownTool),
-        Box::new(MemoryTreeFetchLeavesTool),
+        Box::new(MemoryTreeTool),
         Box::new(ScheduleTool::new(security.clone(), root_config.clone())),
         Box::new(ProxyConfigTool::new(config.clone(), security.clone())),
         Box::new(GitOperationsTool::new(
diff --git a/src/openhuman/tools/orchestrator_tools.rs b/src/openhuman/tools/orchestrator_tools.rs
index 2f4dfbdd5..14e9240b5 100644
--- a/src/openhuman/tools/orchestrator_tools.rs
+++ b/src/openhuman/tools/orchestrator_tools.rs
@@ -213,6 +213,7 @@ mod tests {
             skill_filter: None,
             extra_tools: vec![],
             max_iterations: 8,
+            max_result_chars: None,
             timeout_secs: None,
             sandbox_mode: SandboxMode::None,
             background: false,
diff --git a/tests/agent_harness_public.rs b/tests/agent_harness_public.rs
index 7d7045147..3b9d242e5 100644
--- a/tests/agent_harness_public.rs
+++ b/tests/agent_harness_public.rs
@@ -130,7 +130,7 @@ fn stub_parent_context() -> ParentExecutionContext {
         memory: Arc::new(StubMemory),
         agent_config: AgentConfig::default(),
         skills: Arc::new(vec![]),
-        memory_context: Some("ctx".into()),
+        memory_context: Arc::new(Some("ctx".into())),
         session_id: "test-session".into(),
         channel: "test-channel".into(),
         connected_integrations: vec![],
diff --git a/tests/calendar_grounding_e2e.rs b/tests/calendar_grounding_e2e.rs
index 0938b4ed6..2c65ff5a8 100644
--- a/tests/calendar_grounding_e2e.rs
+++ b/tests/calendar_grounding_e2e.rs
@@ -148,7 +148,7 @@ async fn test_integrations_agent_has_current_date_context() -> Result<()> {
         memory: Arc::new(StubMemory),
         agent_config: openhuman_core::openhuman::config::AgentConfig::default(),
         skills: Arc::new(vec![]),
-        memory_context: None,
+        memory_context: Arc::new(None),
         session_id: "test-session".into(),
         channel: "test".into(),
         connected_integrations: vec![],

From b47a9846d1725134b3324599cee3c98ffc3005d2 Mon Sep 17 00:00:00 2001
From: M3gA-Mind <megamind@mahadao.com>
Date: Thu, 7 May 2026 11:49:19 +0530
Subject: [PATCH 2/9] fix(test): update orchestrator test to match #1141 tool
 surface changes

spawn_subagent was removed in #1141; spawn_worker_thread and memory_tree
are the new entries the test should assert on. Pins the new contract so
future TOML edits can't silently revert it.
---
 src/openhuman/agent/agents/loader.rs | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/openhuman/agent/agents/loader.rs b/src/openhuman/agent/agents/loader.rs
index 5c6e280f2..3bc494926 100644
--- a/src/openhuman/agent/agents/loader.rs
+++ b/src/openhuman/agent/agents/loader.rs
@@ -301,7 +301,20 @@ mod tests {
         assert!(matches!(def.model, ModelSpec::Hint(ref h) if h == "reasoning"));
         match def.tools {
             ToolScope::Named(tools) => {
-                assert!(tools.iter().any(|t| t == "spawn_subagent"));
+                // spawn_subagent was removed in #1141; spawn_worker_thread is the replacement
+                assert!(
+                    tools.iter().any(|t| t == "spawn_worker_thread"),
+                    "orchestrator must have spawn_worker_thread"
+                );
+                assert!(
+                    !tools.iter().any(|t| t == "spawn_subagent"),
+                    "spawn_subagent must not appear — removed in #1141"
+                );
+                // consolidated memory_tree* → single memory_tree with mode dispatch
+                assert!(
+                    tools.iter().any(|t| t == "memory_tree"),
+                    "orchestrator must have memory_tree"
+                );
                 assert!(!tools.iter().any(|t| t == "shell"));
                 assert!(!tools.iter().any(|t| t == "file_write"));
             }

From 9a688b7050f3f71b80a28a873797f62b3bd754ab Mon Sep 17 00:00:00 2001
From: M3gA-Mind <megamind@mahadao.com>
Date: Thu, 7 May 2026 11:49:31 +0530
Subject: [PATCH 3/9] fix(subagent_runner): use char-safe truncation for
 max_result_chars cap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

String::len() counts bytes; String::truncate(n) panics if n lands in the
middle of a multi-byte UTF-8 sequence. Switch to chars().count() for the
guard and char_indices().nth(cap) to find the safe byte offset before
truncating. Also fix the tracing::debug log field — original_chars now
correctly counts characters, not bytes.

Fixes @coderabbitai comment on ops.rs:117.
---
 .../agent/harness/subagent_runner/ops.rs        | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/openhuman/agent/harness/subagent_runner/ops.rs b/src/openhuman/agent/harness/subagent_runner/ops.rs
index 0a34bc963..b8ec65303 100644
--- a/src/openhuman/agent/harness/subagent_runner/ops.rs
+++ b/src/openhuman/agent/harness/subagent_runner/ops.rs
@@ -105,15 +105,26 @@ pub async fn run_subagent(
     .await?;
 
     // Truncate result to the definition's cap if set.
+    // Use char-count (not byte-length) to avoid panicking on multi-byte
+    // UTF-8 sequences at the truncation boundary.
     if let Some(cap) = definition.max_result_chars {
-        if outcome.output.len() > cap {
+        let original_chars = outcome.output.chars().count();
+        if original_chars > cap {
             tracing::debug!(
                 agent_id = %definition.id,
-                original_chars = outcome.output.len(),
+                original_chars,
                 cap,
                 "[subagent_runner] truncating oversized result to max_result_chars cap"
             );
-            outcome.output.truncate(cap);
+            // Find the byte offset of the cap-th character boundary so
+            // `truncate` never lands mid-codepoint.
+            let byte_offset = outcome
+                .output
+                .char_indices()
+                .nth(cap)
+                .map(|(i, _)| i)
+                .unwrap_or(outcome.output.len());
+            outcome.output.truncate(byte_offset);
             outcome.output.push_str("\n[...truncated]");
         }
     }

From 956c383c7ef00098add7cedaa692ce0a4d5e1e79 Mon Sep 17 00:00:00 2001
From: M3gA-Mind <megamind@mahadao.com>
Date: Thu, 7 May 2026 11:49:43 +0530
Subject: [PATCH 4/9] fix(orchestrator): align delegation-guide tool names with
 synthesised tools
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

render_delegation_guide was printing raw ci.toolkit as the delegate_*
tool name, but collect_orchestrator_tools sanitises toolkit slugs via
sanitise_slug() first. A quirky slug (e.g. "Google Calendar" with a
space) would print a tool name in the prompt that never exists in the
function-calling schema. Fix: export sanitise_slug as pub(crate) and use
it in render_delegation_guide so prompt and schema always agree.

Also:
- Skip unconnected integrations in collect_orchestrator_tools (previously
  delegate_* tools were synthesised for disconnected toolkits, whose
  pre-flight check would immediately reject any call).
- Add tracing::debug log lines around the delegation-guide rendering
  branches (connected count, emit/omit decision, section size).
- Update prompt.md to remove stale spawn_subagent reference in "Dedicated
  worker threads" section — the tool was removed in #1141.
- Update prompt.rs module docstring accordingly.

Addresses @coderabbitai comments on prompt.rs:85, orchestrator_tools.rs:126-159,
prompt.md:55-63, and prompt.rs:38-42.
---
 .../agent/agents/orchestrator/prompt.md       | 22 ++++----
 .../agent/agents/orchestrator/prompt.rs       | 38 +++++++++++---
 src/openhuman/tools/orchestrator_tools.rs     | 51 ++++++++++++++++++-
 3 files changed, 90 insertions(+), 21 deletions(-)

diff --git a/src/openhuman/agent/agents/orchestrator/prompt.md b/src/openhuman/agent/agents/orchestrator/prompt.md
index 23c04c66e..4cd202222 100644
--- a/src/openhuman/agent/agents/orchestrator/prompt.md
+++ b/src/openhuman/agent/agents/orchestrator/prompt.md
@@ -52,18 +52,16 @@ or a manual fallback.
 
 ## Dedicated worker threads
 
-`spawn_subagent` accepts an optional `dedicated_thread: true` flag. When set, the
-sub-agent's run is persisted into a fresh **worker**-labeled thread the user can
-open from the thread list, and you receive a compact reference (worker thread id
-+ brief summary) instead of the full sub-agent transcript. Use this **only**
-when the sub-task is genuinely long or complex and the parent thread should not
-be flooded with the sub-agent's output — for example multi-step research,
-multi-file refactors, or batch integration work that produces a large
-transcript. For everyday delegation keep `dedicated_thread` off (the default)
-and surface the result inline.
-
-Worker threads are one level deep by design: a sub-agent never sees
-`spawn_subagent` or `spawn_worker_thread`, so a worker cannot itself spawn another worker.
+Use `spawn_worker_thread` for genuinely long or complex delegated tasks where the full
+sub-agent transcript would flood the parent thread — for example multi-step research,
+multi-file refactors, or batch integration work. It creates a persisted **worker**-labeled
+thread the user can open from the thread list, and returns a compact `[worker_thread_ref]`
+(thread id + brief summary) to the parent instead of the full transcript.
+
+For routine delegation use the matching `delegate_*` tool and surface the result inline.
+
+Worker threads are one level deep by design: a sub-agent spawned via `spawn_worker_thread`
+cannot itself call `spawn_worker_thread`, so workers never nest.
 
 ## Connecting external services
 
diff --git a/src/openhuman/agent/agents/orchestrator/prompt.rs b/src/openhuman/agent/agents/orchestrator/prompt.rs
index 2e043afec..3046e85a9 100644
--- a/src/openhuman/agent/agents/orchestrator/prompt.rs
+++ b/src/openhuman/agent/agents/orchestrator/prompt.rs
@@ -3,7 +3,8 @@
 //! The orchestrator follows a direct-first policy: respond directly or use
 //! cheap direct tools whenever possible, and delegate only for specialised
 //! execution. It never executes Composio actions itself; the integration
-//! block points to `spawn_subagent(integrations_agent, toolkit=…)` for true
+//! block points to `delegate_{toolkit}` tools (synthesised by
+//! `orchestrator_tools::collect_orchestrator_tools`) for true
 //! external-service operations. That prose lives here (not in the shared
 //! prompts module) so the skill-executor voice stays in
 //! `integrations_agent/prompt.rs` and nobody has to branch on `agent_id`
@@ -13,6 +14,7 @@ use crate::openhuman::context::prompt::{
     render_datetime, render_tools, render_user_files, render_workspace, ConnectedIntegration,
     PromptContext,
 };
+use crate::openhuman::tools::orchestrator_tools::sanitise_slug;
 use anyhow::Result;
 use std::fmt::Write;
 
@@ -62,16 +64,28 @@ pub fn build(ctx: &PromptContext<'_>) -> Result<String> {
     Ok(out)
 }
 
-/// Render the delegator-voice `## Delegation Guide — Integrations`
-/// block. Only toolkits the user has actively connected are listed —
-/// unauthorised toolkits are hidden so the orchestrator can't hallucinate
-/// a spawn against an integration the `spawn_subagent` pre-flight will
-/// immediately reject. When every toolkit is unconnected, the whole
-/// section is omitted.
+/// Render the delegator-voice `## Connected Integrations` block. Only
+/// toolkits the user has actively connected are listed — unauthorised
+/// toolkits are hidden so the orchestrator cannot hallucinate a delegation
+/// to an integration whose `delegate_*` tool does not actually exist.
+/// When every toolkit is unconnected the whole section is omitted.
+///
+/// The tool name printed in the prompt is derived with the same
+/// `sanitise_slug` function that `collect_orchestrator_tools` uses when
+/// synthesising the real tool objects, so the names in the prompt always
+/// match the names in the function-calling schema.
 fn render_delegation_guide(integrations: &[ConnectedIntegration]) -> String {
     let connected: Vec<&ConnectedIntegration> =
         integrations.iter().filter(|ci| ci.connected).collect();
+    tracing::debug!(
+        total_integrations = integrations.len(),
+        connected_count = connected.len(),
+        "[delegation-guide] rendering integration section ({} connected / {} total)",
+        connected.len(),
+        integrations.len()
+    );
     if connected.is_empty() {
+        tracing::debug!("[delegation-guide] section omitted — no connected integrations");
         return String::new();
     }
     let mut out = String::from(
@@ -79,12 +93,20 @@ fn render_delegation_guide(integrations: &[ConnectedIntegration]) -> String {
          Delegate tasks for these services using the matching `delegate_{toolkit}` tool:\n\n",
     );
     for ci in connected {
+        // Use the same slug canonicalisation as `collect_orchestrator_tools`
+        // so the tool name in the prompt always matches the synthesised tool.
+        let slug = sanitise_slug(&ci.toolkit);
         let _ = writeln!(
             out,
             "- **{}** (delegate via `delegate_{}`): {}",
-            ci.toolkit, ci.toolkit, ci.description
+            ci.toolkit, slug, ci.description
         );
     }
+    tracing::debug!(
+        section_len = out.len(),
+        "[delegation-guide] section emitted ({} bytes)",
+        out.len()
+    );
     out
 }
 
diff --git a/src/openhuman/tools/orchestrator_tools.rs b/src/openhuman/tools/orchestrator_tools.rs
index 14e9240b5..77e162861 100644
--- a/src/openhuman/tools/orchestrator_tools.rs
+++ b/src/openhuman/tools/orchestrator_tools.rs
@@ -124,6 +124,17 @@ pub fn collect_orchestrator_tools(
                     continue;
                 }
                 for integration in connected_integrations {
+                    // Only emit a delegate_* tool for integrations that are
+                    // actually connected — exposing unconnected entries would
+                    // let the orchestrator call a tool whose pre-flight
+                    // will immediately reject with "not connected".
+                    if !integration.connected {
+                        log::debug!(
+                            "[orchestrator_tools] skipping unconnected integration: {}",
+                            integration.toolkit
+                        );
+                        continue;
+                    }
                     // Slug the toolkit name into a tool-name-safe form.
                     // Composio toolkit slugs are already lowercase / dash-
                     // separated (e.g. "gmail", "google_calendar"), but
@@ -175,7 +186,11 @@ pub fn collect_orchestrator_tools(
 /// Allows ASCII alphanumerics and underscores; everything else becomes
 /// an underscore. OpenAI-style function names only accept
 /// `[a-zA-Z0-9_-]{1,64}`, so this is the conservative subset.
-fn sanitise_slug(raw: &str) -> String {
+///
+/// Used both when synthesising `delegate_*` tools and when rendering the
+/// delegation guide in prompts — they must agree on slug canonicalisation
+/// so the prompt always references a tool name that actually exists.
+pub(crate) fn sanitise_slug(raw: &str) -> String {
     raw.chars()
         .map(|c| {
             if c.is_ascii_alphanumeric() || c == '_' {
@@ -357,4 +372,38 @@ mod tests {
         assert_eq!(sanitise_slug("slack.bot"), "slack_bot");
         assert_eq!(sanitise_slug("weird name!"), "weird_name_");
     }
+
+    /// Unconnected integrations must be silently skipped — exposing a
+    /// `delegate_*` tool for a toolkit whose OAuth token is absent would
+    /// let the orchestrator call a tool whose pre-flight check immediately
+    /// rejects with "not connected".
+    #[test]
+    fn unconnected_integrations_are_skipped() {
+        let orch = sample_orchestrator();
+        let reg = registry_with_targets();
+        let integrations = vec![
+            integration("gmail", "Send and read email."),
+            ConnectedIntegration {
+                toolkit: "github".into(),
+                description: "GitHub access.".into(),
+                tools: vec![],
+                connected: false, // not connected — must not produce a tool
+            },
+            integration("notion", "Read and write pages."),
+        ];
+        let tools = collect_orchestrator_tools(&orch, &reg, &integrations);
+        let names: Vec<&str> = tools.iter().map(|t| t.name()).collect();
+        assert!(
+            names.contains(&"delegate_gmail"),
+            "connected gmail must produce a tool"
+        );
+        assert!(
+            !names.contains(&"delegate_github"),
+            "unconnected github must NOT produce a tool"
+        );
+        assert!(
+            names.contains(&"delegate_notion"),
+            "connected notion must produce a tool"
+        );
+    }
 }

From e1481e81ddb4ba26aaafcdcd9b2f192dbb718c23 Mon Sep 17 00:00:00 2001
From: M3gA-Mind <megamind@mahadao.com>
Date: Thu, 7 May 2026 11:49:59 +0530
Subject: [PATCH 5/9] fix(prompts): pass dispatcher_instructions through for
 Native tool format

The ToolsSection early-return on ToolCallFormat::Native discarded any
non-empty dispatcher_instructions. NativeToolDispatcher::prompt_instructions()
returns a non-empty "## Tool Use Protocol" block that models need in order
to understand they should emit native tool calls rather than narrate intent.

New behaviour: if dispatcher_instructions is non-empty for Native mode,
return it directly (the tool catalogue is still omitted to save tokens);
if empty, return String::new() as before.

Adds a test to pin the new contract.

Addresses @coderabbitai comment on prompts/mod.rs:398-403.
---
 src/openhuman/agent/prompts/mod.rs       | 12 ++++++--
 src/openhuman/agent/prompts/mod_tests.rs | 36 ++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/src/openhuman/agent/prompts/mod.rs b/src/openhuman/agent/prompts/mod.rs
index b589d79e8..50913defe 100644
--- a/src/openhuman/agent/prompts/mod.rs
+++ b/src/openhuman/agent/prompts/mod.rs
@@ -396,10 +396,16 @@ impl PromptSection for ToolsSection {
 
     fn build(&self, ctx: &PromptContext<'_>) -> Result<String> {
         // Native function-calling: the provider already sends full JSON
-        // schemas in the API request — no need to repeat them in the
-        // system prompt. Skip the catalogue to save tokens.
+        // schemas in the API request — no need to repeat the tool catalogue
+        // in the system prompt (pure token bloat). However, any non-empty
+        // `dispatcher_instructions` (e.g. the "## Tool Use Protocol" block
+        // from NativeToolDispatcher) must still be included so the model
+        // receives its behavioural guidance.
         if ctx.tool_call_format == ToolCallFormat::Native {
-            return Ok(String::new());
+            if ctx.dispatcher_instructions.trim().is_empty() {
+                return Ok(String::new());
+            }
+            return Ok(ctx.dispatcher_instructions.to_string());
         }
         let mut out = String::from("## Tools\n\n");
         let has_filter = !ctx.visible_tool_names.is_empty();
diff --git a/src/openhuman/agent/prompts/mod_tests.rs b/src/openhuman/agent/prompts/mod_tests.rs
index 8e7d85cb5..beb7b8dd4 100644
--- a/src/openhuman/agent/prompts/mod_tests.rs
+++ b/src/openhuman/agent/prompts/mod_tests.rs
@@ -1384,3 +1384,39 @@ fn tools_section_nonempty_for_pformat() {
         "PFormat should render tool catalogue header, got: {out:?}"
     );
 }
+
+#[test]
+fn tools_section_native_with_dispatcher_instructions_returns_instructions() {
+    // Native mode must still include non-empty dispatcher_instructions
+    // (e.g. the "## Tool Use Protocol" block from NativeToolDispatcher) so
+    // the model receives behavioural guidance even though the tool catalogue
+    // itself is omitted.
+    let tools: Vec<Box<dyn Tool>> = vec![Box::new(TestTool)];
+    let prompt_tools = PromptTool::from_tools(&tools);
+    let ctx = PromptContext {
+        workspace_dir: Path::new("/tmp"),
+        model_name: "test-model",
+        agent_id: "",
+        tools: &prompt_tools,
+        skills: &[],
+        dispatcher_instructions: "## Tool Use Protocol\n\nUse native tool calling.",
+        learned: LearnedContextData::default(),
+        visible_tool_names: &NO_FILTER,
+        tool_call_format: ToolCallFormat::Native,
+        connected_integrations: &[],
+        connected_identities_md: String::new(),
+        include_profile: false,
+        include_memory_md: false,
+        curated_snapshot: None,
+        user_identity: None,
+    };
+    let out = ToolsSection.build(&ctx).unwrap();
+    assert!(
+        out.contains("## Tool Use Protocol"),
+        "Native mode with non-empty dispatcher_instructions must include them, got: {out:?}"
+    );
+    assert!(
+        !out.contains("## Tools"),
+        "Native mode must not include the tool catalogue header, got: {out:?}"
+    );
+}

From 594463ec1bc2a545a95a1748d2286f02aa9ea4c7 Mon Sep 17 00:00:00 2001
From: M3gA-Mind <megamind@mahadao.com>
Date: Thu, 7 May 2026 11:50:14 +0530
Subject: [PATCH 6/9] fix(dispatch): add 3s timeout to
 fetch_connected_integrations in routing path

build_connection_state_block already wraps its fetch in a 3-second
timeout to prevent Composio API latency from blocking turn processing,
but the fetch at the delegation-tools expansion site (line ~442) had no
such protection. Slow or unresponsive Composio API could block agent
routing indefinitely.

Wrap with the same 3s tokio::time::timeout and fall back to Vec::new()
on timeout, matching the pattern used elsewhere in this file.

Addresses @coderabbitai comment on dispatch.rs:441-449.
---
 src/openhuman/channels/runtime/dispatch.rs | 23 +++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/openhuman/channels/runtime/dispatch.rs b/src/openhuman/channels/runtime/dispatch.rs
index 70c9585d6..804fcc5f0 100644
--- a/src/openhuman/channels/runtime/dispatch.rs
+++ b/src/openhuman/channels/runtime/dispatch.rs
@@ -438,8 +438,29 @@ async fn resolve_target_agent(channel: &str) -> AgentScoping {
     // the helper is agent-agnostic so future agents that delegate
     // (e.g. a custom workspace-override planner that subdivides work)
     // pick this up for free.
+    //
+    // Wrap the Composio fetch in the same 3-second timeout used by
+    // `build_connection_state_block` so a slow/unresponsive Composio API
+    // can never block turn dispatch indefinitely.
+    const COMPOSIO_FETCH_TIMEOUT_SECS: u64 = 3;
     let extra_tools = if !definition.subagents.is_empty() {
-        let connected = fetch_connected_integrations(&config).await;
+        let connected = match tokio::time::timeout(
+            Duration::from_secs(COMPOSIO_FETCH_TIMEOUT_SECS),
+            fetch_connected_integrations(&config),
+        )
+        .await
+        {
+            Ok(list) => list,
+            Err(_) => {
+                tracing::warn!(
+                    channel = %channel,
+                    target_agent = target_id,
+                    "[dispatch::routing] Composio fetch timed out after {}s — proceeding without connected integrations",
+                    COMPOSIO_FETCH_TIMEOUT_SECS
+                );
+                Vec::new()
+            }
+        };
         tracing::debug!(
             channel = %channel,
             target_agent = target_id,

From 97e5c9120e4c75ac4d28a452833714d6f06215d4 Mon Sep 17 00:00:00 2001
From: M3gA-Mind <megamind@mahadao.com>
Date: Thu, 7 May 2026 11:51:48 +0530
Subject: [PATCH 7/9] test(subagent_runner,orchestrator_tools): add char-safe
 and unconnected-integration tests

- ops_tests.rs: add max_result_chars_cap_is_char_safe_for_multibyte to
  prove truncation at a multi-byte codepoint boundary does not panic;
  update existing tests to use the char-safe logic matching ops.rs.
- orchestrator_tools.rs: add unconnected_integrations_are_skipped to pin
  the new connected-only filtering contract.

Addresses @coderabbitai nitpick on ops_tests.rs:641-668.
---
 .../harness/subagent_runner/ops_tests.rs      | 52 +++++++++++++++----
 1 file changed, 43 insertions(+), 9 deletions(-)

diff --git a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs
index c5dfce69c..e6dc740f4 100644
--- a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs
+++ b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs
@@ -640,27 +640,61 @@ async fn typed_mode_progress_emission_is_a_noop_without_sink() {
 
 #[test]
 fn max_result_chars_cap_is_enforced() {
-    // Verify that `max_result_chars` truncation logic is correct.
-    // We test the truncation formula directly since running the full
-    // subagent loop requires a provider.
+    // Verify that max_result_chars truncation uses char count (not bytes)
+    // and produces a truncated result ending with "[...truncated]".
     let cap = 10usize;
-    let mut output = "hello world this is long".to_string();
-    if output.len() > cap {
-        output.truncate(cap);
+    let input = "hello world this is long".to_string();
+    // Simulate the logic in ops.rs (char-safe path).
+    let original_chars = input.chars().count();
+    let mut output = input.clone();
+    if original_chars > cap {
+        let byte_offset = output
+            .char_indices()
+            .nth(cap)
+            .map(|(i, _)| i)
+            .unwrap_or(output.len());
+        output.truncate(byte_offset);
         output.push_str("\n[...truncated]");
     }
-    assert!(output.starts_with("hello worl"));
+    assert_eq!(&output[..10], "hello worl");
     assert!(output.ends_with("[...truncated]"));
 }
 
+#[test]
+fn max_result_chars_cap_is_char_safe_for_multibyte() {
+    // A cap landing in the middle of a multi-byte UTF-8 sequence must
+    // not panic. "café" has 4 chars but the 'é' is 2 bytes — truncating
+    // at byte 4 with the old String::truncate() would panic.
+    let cap = 3usize; // keep "caf", drop "é"
+    let input = "café latte".to_string();
+    let original_chars = input.chars().count();
+    let mut output = input.clone();
+    if original_chars > cap {
+        let byte_offset = output
+            .char_indices()
+            .nth(cap)
+            .map(|(i, _)| i)
+            .unwrap_or(output.len());
+        output.truncate(byte_offset);
+        output.push_str("\n[...truncated]");
+    }
+    assert_eq!(output, "caf\n[...truncated]");
+}
+
 #[test]
 fn max_result_chars_not_applied_when_none() {
     let cap: Option<usize> = None;
     let original = "short output".to_string();
     let mut output = original.clone();
     if let Some(c) = cap {
-        if output.len() > c {
-            output.truncate(c);
+        let char_len = output.chars().count();
+        if char_len > c {
+            let byte_offset = output
+                .char_indices()
+                .nth(c)
+                .map(|(i, _)| i)
+                .unwrap_or(output.len());
+            output.truncate(byte_offset);
             output.push_str("\n[...truncated]");
         }
     }

From da54bb7bf8fc54e0f4c4ef0659f7c238b13ec09d Mon Sep 17 00:00:00 2001
From: M3gA-Mind <megamind@mahadao.com>
Date: Thu, 7 May 2026 12:06:50 +0530
Subject: [PATCH 8/9] fix(test): update orchestrator_lists_memory_tree_tools
 for consolidated tool

#1141 consolidated 6 individual memory_tree_* tools into a single
memory_tree tool with mode dispatch. The integration test in
tests/agent_retrieval_e2e.rs still asserted all 6 old names existed in
the orchestrator TOML, causing CI to fail.

Update the test to:
- Assert memory_tree IS present (the new consolidated tool)
- Assert none of the old individual names are present (pinning removal)
---
 tests/agent_retrieval_e2e.rs | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/tests/agent_retrieval_e2e.rs b/tests/agent_retrieval_e2e.rs
index edebef5ad..3bd7e2935 100644
--- a/tests/agent_retrieval_e2e.rs
+++ b/tests/agent_retrieval_e2e.rs
@@ -80,13 +80,24 @@ fn alice_phoenix_thread() -> EmailThread {
     }
 }
 
-/// The orchestrator definition must list every memory-tree tool name so
-/// the bus filter actually exposes them to the LLM. A wired-up wrapper
-/// that's invisible to the orchestrator is dead code.
+/// The orchestrator definition must list the consolidated `memory_tree` tool
+/// so the bus filter exposes it to the LLM. A wired-up wrapper that's
+/// invisible to the orchestrator is dead code.
+///
+/// NOTE: #1141 consolidated the 6 individual `memory_tree_*` tools
+/// (`memory_tree_search_entities`, `memory_tree_query_topic`, etc.) into a
+/// single `memory_tree` tool with a `mode` dispatch parameter. The orchestrator
+/// TOML was updated accordingly.
 #[test]
 fn orchestrator_lists_memory_tree_tools() {
     let toml = include_str!("../src/openhuman/agent/agents/orchestrator/agent.toml");
-    for name in [
+    assert!(
+        toml.contains("memory_tree"),
+        "orchestrator agent.toml must list 'memory_tree' so the LLM can call it"
+    );
+    // Verify the old individual tool names are gone — they were removed in #1141
+    // when all 6 were consolidated into the single `memory_tree` dispatcher.
+    for old_name in [
         "memory_tree_search_entities",
         "memory_tree_query_topic",
         "memory_tree_query_source",
@@ -95,8 +106,8 @@ fn orchestrator_lists_memory_tree_tools() {
         "memory_tree_fetch_leaves",
     ] {
         assert!(
-            toml.contains(name),
-            "orchestrator agent.toml must list '{name}' so the LLM can call it"
+            !toml.contains(old_name),
+            "orchestrator agent.toml must NOT list '{old_name}' — removed in #1141 (use 'memory_tree' with mode= dispatch)"
         );
     }
 }

From f9943f721d4585380c92e232df24ae9d8a096df8 Mon Sep 17 00:00:00 2001
From: M3gA-Mind <megamind@mahadao.com>
Date: Thu, 7 May 2026 12:22:26 +0530
Subject: [PATCH 9/9] fix(review): address CodeRabbit feedback on PR #1314

- orchestrator/prompt.md: replace `integrations_agent` references in
  the decision tree with the actual synthesised `delegate_{toolkit}` /
  `delegate_run_code` / `delegate_plan` etc. tool names so the prompt
  matches what the LLM sees in its function-calling schema
- tests/agent_retrieval_e2e.rs: strengthen memory_tree tool assertion to
  use exact line matching (`"memory_tree"` / `"memory_tree",`) rather
  than a substring match that could hit comments or prefixed names
- subagent_runner/ops_truncation_tests.rs: extract the three
  max_result_chars truncation tests into their own file, keeping
  ops_tests.rs under the 500-line guideline threshold
---
 .../agent/agents/orchestrator/prompt.md       | 11 +--
 .../agent/harness/subagent_runner/ops.rs      |  4 ++
 .../harness/subagent_runner/ops_tests.rs      | 64 +-----------------
 .../subagent_runner/ops_truncation_tests.rs   | 67 +++++++++++++++++++
 tests/agent_retrieval_e2e.rs                  | 17 ++++-
 5 files changed, 93 insertions(+), 70 deletions(-)
 create mode 100644 src/openhuman/agent/harness/subagent_runner/ops_truncation_tests.rs

diff --git a/src/openhuman/agent/agents/orchestrator/prompt.md b/src/openhuman/agent/agents/orchestrator/prompt.md
index 4cd202222..0122fbee0 100644
--- a/src/openhuman/agent/agents/orchestrator/prompt.md
+++ b/src/openhuman/agent/agents/orchestrator/prompt.md
@@ -21,11 +21,12 @@ Follow this sequence for every user message:
    - Yes: use direct tools first (`current_time`, `cron_*`, `memory_*`, `composio_list_connections`, etc.).
    - No: continue.
 3. **Does this need specialised execution?**
-   - If external SaaS integration work is required, delegate to `integrations_agent` with the right toolkit.
-   - If code writing/execution/debugging is required, delegate to `code_executor`.
-   - If web/doc crawling is required, delegate to `researcher`.
-   - If complex multi-step decomposition is required, delegate to `planner` (and only then route deeper if necessary).
-   - If code review is requested, delegate to `critic`.
+   - If external SaaS integration work is required, use `delegate_{toolkit}` (e.g. `delegate_gmail`, `delegate_notion`).
+   - If code writing/execution/debugging is required, use `delegate_run_code`.
+   - If web/doc crawling is required, use `delegate_researcher`.
+   - If complex multi-step decomposition is required, use `delegate_plan`.
+   - If code review is requested, use `delegate_critic`.
+   - If memory archiving or distillation is required, use `delegate_archivist`.
 4. **After delegation**, summarise results clearly and concisely.
 
 Default bias: **do not spawn a sub-agent when a direct response or direct tool call is sufficient**.
diff --git a/src/openhuman/agent/harness/subagent_runner/ops.rs b/src/openhuman/agent/harness/subagent_runner/ops.rs
index b8ec65303..408fb756a 100644
--- a/src/openhuman/agent/harness/subagent_runner/ops.rs
+++ b/src/openhuman/agent/harness/subagent_runner/ops.rs
@@ -1344,3 +1344,7 @@ fn parse_tool_arguments(arguments: &str) -> serde_json::Value {
 #[cfg(test)]
 #[path = "ops_tests.rs"]
 mod tests;
+
+#[cfg(test)]
+#[path = "ops_truncation_tests.rs"]
+mod truncation_tests;
diff --git a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs
index e6dc740f4..d2829b499 100644
--- a/src/openhuman/agent/harness/subagent_runner/ops_tests.rs
+++ b/src/openhuman/agent/harness/subagent_runner/ops_tests.rs
@@ -638,65 +638,5 @@ async fn typed_mode_progress_emission_is_a_noop_without_sink() {
     assert_eq!(outcome.iterations, 1);
 }
 
-#[test]
-fn max_result_chars_cap_is_enforced() {
-    // Verify that max_result_chars truncation uses char count (not bytes)
-    // and produces a truncated result ending with "[...truncated]".
-    let cap = 10usize;
-    let input = "hello world this is long".to_string();
-    // Simulate the logic in ops.rs (char-safe path).
-    let original_chars = input.chars().count();
-    let mut output = input.clone();
-    if original_chars > cap {
-        let byte_offset = output
-            .char_indices()
-            .nth(cap)
-            .map(|(i, _)| i)
-            .unwrap_or(output.len());
-        output.truncate(byte_offset);
-        output.push_str("\n[...truncated]");
-    }
-    assert_eq!(&output[..10], "hello worl");
-    assert!(output.ends_with("[...truncated]"));
-}
-
-#[test]
-fn max_result_chars_cap_is_char_safe_for_multibyte() {
-    // A cap landing in the middle of a multi-byte UTF-8 sequence must
-    // not panic. "café" has 4 chars but the 'é' is 2 bytes — truncating
-    // at byte 4 with the old String::truncate() would panic.
-    let cap = 3usize; // keep "caf", drop "é"
-    let input = "café latte".to_string();
-    let original_chars = input.chars().count();
-    let mut output = input.clone();
-    if original_chars > cap {
-        let byte_offset = output
-            .char_indices()
-            .nth(cap)
-            .map(|(i, _)| i)
-            .unwrap_or(output.len());
-        output.truncate(byte_offset);
-        output.push_str("\n[...truncated]");
-    }
-    assert_eq!(output, "caf\n[...truncated]");
-}
-
-#[test]
-fn max_result_chars_not_applied_when_none() {
-    let cap: Option<usize> = None;
-    let original = "short output".to_string();
-    let mut output = original.clone();
-    if let Some(c) = cap {
-        let char_len = output.chars().count();
-        if char_len > c {
-            let byte_offset = output
-                .char_indices()
-                .nth(c)
-                .map(|(i, _)| i)
-                .unwrap_or(output.len());
-            output.truncate(byte_offset);
-            output.push_str("\n[...truncated]");
-        }
-    }
-    assert_eq!(output, original);
-}
+// Truncation tests live in ops_truncation_tests.rs to keep this file
+// under the ~500-line guideline.
diff --git a/src/openhuman/agent/harness/subagent_runner/ops_truncation_tests.rs b/src/openhuman/agent/harness/subagent_runner/ops_truncation_tests.rs
new file mode 100644
index 000000000..e8141c07c
--- /dev/null
+++ b/src/openhuman/agent/harness/subagent_runner/ops_truncation_tests.rs
@@ -0,0 +1,67 @@
+/// Tests for the `max_result_chars` truncation logic in `ops.rs`.
+///
+/// Kept in a dedicated file so `ops_tests.rs` stays under ~500 lines.
+/// The logic under test lives in `run_subagent` — tests here cover the
+/// char-safe truncation path directly without spinning up a provider.
+
+#[test]
+fn max_result_chars_cap_is_enforced() {
+    // Verify that max_result_chars truncation uses char count (not bytes)
+    // and produces a truncated result ending with "[...truncated]".
+    let cap = 10usize;
+    let input = "hello world this is long".to_string();
+    let original_chars = input.chars().count();
+    let mut output = input.clone();
+    if original_chars > cap {
+        let byte_offset = output
+            .char_indices()
+            .nth(cap)
+            .map(|(i, _)| i)
+            .unwrap_or(output.len());
+        output.truncate(byte_offset);
+        output.push_str("\n[...truncated]");
+    }
+    assert_eq!(&output[..10], "hello worl");
+    assert!(output.ends_with("[...truncated]"));
+}
+
+#[test]
+fn max_result_chars_cap_is_char_safe_for_multibyte() {
+    // A cap landing in the middle of a multi-byte UTF-8 sequence must
+    // not panic. "café" has 4 chars but 'é' is 2 bytes — truncating at
+    // byte offset 4 with a raw String::truncate() would panic.
+    let cap = 3usize; // keep "caf", drop "é"
+    let input = "café latte".to_string();
+    let original_chars = input.chars().count();
+    let mut output = input.clone();
+    if original_chars > cap {
+        let byte_offset = output
+            .char_indices()
+            .nth(cap)
+            .map(|(i, _)| i)
+            .unwrap_or(output.len());
+        output.truncate(byte_offset);
+        output.push_str("\n[...truncated]");
+    }
+    assert_eq!(output, "caf\n[...truncated]");
+}
+
+#[test]
+fn max_result_chars_not_applied_when_none() {
+    let cap: Option<usize> = None;
+    let original = "short output".to_string();
+    let mut output = original.clone();
+    if let Some(c) = cap {
+        let char_len = output.chars().count();
+        if char_len > c {
+            let byte_offset = output
+                .char_indices()
+                .nth(c)
+                .map(|(i, _)| i)
+                .unwrap_or(output.len());
+            output.truncate(byte_offset);
+            output.push_str("\n[...truncated]");
+        }
+    }
+    assert_eq!(output, original);
+}
diff --git a/tests/agent_retrieval_e2e.rs b/tests/agent_retrieval_e2e.rs
index 3bd7e2935..3ea560ece 100644
--- a/tests/agent_retrieval_e2e.rs
+++ b/tests/agent_retrieval_e2e.rs
@@ -91,9 +91,14 @@ fn alice_phoenix_thread() -> EmailThread {
 #[test]
 fn orchestrator_lists_memory_tree_tools() {
     let toml = include_str!("../src/openhuman/agent/agents/orchestrator/agent.toml");
+    // Exact entry match — substring match would also hit comments or prefixed names.
+    let has_memory_tree_entry = toml
+        .lines()
+        .map(str::trim)
+        .any(|line| line == "\"memory_tree\"" || line == "\"memory_tree\",");
     assert!(
-        toml.contains("memory_tree"),
-        "orchestrator agent.toml must list 'memory_tree' so the LLM can call it"
+        has_memory_tree_entry,
+        "orchestrator agent.toml must list 'memory_tree' as a named tool entry"
     );
     // Verify the old individual tool names are gone — they were removed in #1141
     // when all 6 were consolidated into the single `memory_tree` dispatcher.
@@ -105,8 +110,14 @@ fn orchestrator_lists_memory_tree_tools() {
         "memory_tree_drill_down",
         "memory_tree_fetch_leaves",
     ] {
+        let entry = format!("\"{old_name}\"");
+        let entry_comma = format!("\"{old_name}\",");
+        let old_tool_present = toml
+            .lines()
+            .map(str::trim)
+            .any(|line| line == entry || line == entry_comma);
         assert!(
-            !toml.contains(old_name),
+            !old_tool_present,
             "orchestrator agent.toml must NOT list '{old_name}' — removed in #1141 (use 'memory_tree' with mode= dispatch)"
         );
     }