From 1c9b80920ce01333af12c6cd910fc7ab1bc91c89 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Tue, 3 Mar 2026 10:37:45 +0000
Subject: [PATCH 01/10] Implement generic Memory Tool and Anthropic-native
 memory tool

---
 src/microbots/MicroBot.py                     |  41 ++-
 src/microbots/llm/anthropic_api.py            | 123 ++++++-
 .../tool_definitions/anthropic_memory_tool.py | 184 +++++++++++
 .../tools/tool_definitions/memory_tool.py     | 312 ++++++++++++++++++
 4 files changed, 647 insertions(+), 13 deletions(-)
 create mode 100644 src/microbots/tools/tool_definitions/anthropic_memory_tool.py
 create mode 100644 src/microbots/tools/tool_definitions/memory_tool.py

diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py
index 3ace5a4..7b061d7 100644
--- a/src/microbots/MicroBot.py
+++ b/src/microbots/MicroBot.py
@@ -317,7 +317,38 @@ def _create_environment(self, folder_to_mount: Optional[Mount]):
             folder_to_mount=folder_to_mount,
         )
 
+    def _upgrade_tools_for_provider(self):
+        """Auto-upgrade provider-agnostic tools to their provider-optimised variants.
+
+        Currently: replaces any ``MemoryTool`` with ``AnthropicMemoryTool`` when
+        the provider is Anthropic so the model gets native structured tool-use
+        instead of the text-command loop.  The ``memory_dir`` and any custom
+        ``usage_instructions_to_llm`` are forwarded to the upgraded instance.
+        """
+        if self.model_provider != ModelProvider.ANTHROPIC:
+            return
+
+        # Local imports to avoid pulling Anthropic SDK into non-Anthropic paths
+        from microbots.tools.tool_definitions.memory_tool import MemoryTool
+        from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
+
+        upgraded = []
+        for tool in self.additional_tools:
+            if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool):
+                logger.info(
+                    "🧠 Auto-upgrading MemoryTool → AnthropicMemoryTool for Anthropic provider"
+                )
+                upgraded.append(AnthropicMemoryTool(
+                    memory_dir=tool.memory_dir,
+                    usage_instructions=tool.usage_instructions_to_llm,
+                ))
+            else:
+                upgraded.append(tool)
+        self.additional_tools = upgraded
+
     def _create_llm(self):
+        self._upgrade_tools_for_provider()
+
         # Append tool usage instructions to system prompt
         system_prompt_with_tools = self.system_prompt if self.system_prompt else ""
         if self.additional_tools:
@@ -334,8 +365,16 @@ def _create_llm(self):
                 system_prompt=system_prompt_with_tools, model_name=self.deployment_name
             )
         elif self.model_provider == ModelProvider.ANTHROPIC:
+            # Detect Anthropic-native tools (e.g. AnthropicMemoryTool) by duck-typing:
+            # any tool that exposes both to_dict() and call() is a native Anthropic tool.
+            native_tools = [
+                t for t in self.additional_tools
+                if callable(getattr(t, "to_dict", None)) and callable(getattr(t, "call", None))
+            ]
             self.llm = AnthropicApi(
-                system_prompt=system_prompt_with_tools, deployment_name=self.deployment_name
+                system_prompt=system_prompt_with_tools,
+                deployment_name=self.deployment_name,
+                native_tools=native_tools or None,
             )
         # No Else case required as model provider is already validated using _validate_model_and_provider
 
diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py
index f40118a..4a73ba5 100644
--- a/src/microbots/llm/anthropic_api.py
+++ b/src/microbots/llm/anthropic_api.py
@@ -1,7 +1,9 @@
 import json
 import os
+import re
 from dataclasses import asdict
 from logging import getLogger
+from typing import List, Optional
 
 from dotenv import load_dotenv
 from anthropic import Anthropic
@@ -16,9 +18,31 @@
 api_key = os.getenv("ANTHROPIC_API_KEY")
 
 
+
 class AnthropicApi(LLMInterface):
 
-    def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3):
+    def __init__(
+        self,
+        system_prompt: str,
+        deployment_name: str = deployment_name,
+        max_retries: int = 3,
+        native_tools: Optional[List] = None,
+    ):
+        """
+        Parameters
+        ----------
+        system_prompt : str
+            System prompt for the LLM.
+        deployment_name : str
+            The Anthropic model deployment name.
+        max_retries : int
+            Maximum number of retries for invalid LLM responses.
+        native_tools : Optional[List]
+            Anthropic-native tool objects (e.g. ``AnthropicMemoryTool``) that
+            have both a ``to_dict()`` and a ``call()`` method.  These are passed
+            directly to the API and their tool-use blocks are dispatched here
+            before the JSON response is returned to the caller.
+        """
         self.ai_client = Anthropic(
             api_key=api_key,
             base_url=endpoint
@@ -26,31 +50,106 @@ def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3
         self.deployment_name = deployment_name
         self.system_prompt = system_prompt
         self.messages = []
+        self.native_tools = native_tools or []
+        # Cache tool dicts once so _call_api and _dispatch_tool_use don't
+        # re-serialise on every invocation (important when multiple native
+        # tools are registered, e.g. memory + bash).
+        self._native_tool_dicts = [t.to_dict() for t in self.native_tools]
+        self._native_tools_by_name = {d["name"]: t for d, t in zip(self._native_tool_dicts, self.native_tools)}
 
         # Set these values here. This logic will be handled in the parent class.
         self.max_retries = max_retries
         self.retries = 0
 
-    def ask(self, message) -> LLMAskResponse:
+    # ---------------------------------------------------------------------- #
+    # Internal helpers
+    # ---------------------------------------------------------------------- #
+
+    def _call_api(self) -> object:
+        """Call the Anthropic messages API, including native tools when present."""
+        kwargs = dict(
+            model=self.deployment_name,
+            system=self.system_prompt,
+            messages=self.messages,
+            max_tokens=4096,
+        )
+
+        if self.native_tools:
+            kwargs["tools"] = self._native_tool_dicts
+
+        return self.ai_client.messages.create(**kwargs)
+
+    def _dispatch_tool_use(self, response) -> None:
+        """Handle a tool_use response: execute each tool call and append results.
+
+        Mutates ``self.messages`` in place — appends the assistant turn (with
+        all content blocks) and the corresponding tool_result user turn.
+        """
+        # Append the full assistant message as-is (content is a list of blocks)
+        assistant_content = [block.model_dump() for block in response.content]
+        self.messages.append({"role": "assistant", "content": assistant_content})
+
+        # Build tool_result entries for every tool_use block
+        tool_results = []
+        for block in response.content:
+            if block.type != "tool_use":
+                continue
+
+            # Find the matching native tool by name
+            tool = self._native_tools_by_name.get(block.name)
+            if tool is None:
+                result_text = f"Error: unknown tool '{block.name}'"
+                logger.error("Received tool_use for unknown tool: %s", block.name)
+            else:
+                try:
+                    result_text = tool.call(block.input)
+                    logger.info(
+                        "🧠 Native tool '%s' executed. Result (first 200 chars): %s",
+                        block.name,
+                        str(result_text)[:200],
+                    )
+                except Exception as exc:
+                    result_text = f"Error executing tool '{block.name}': {exc}"
+                    logger.error("Native tool '%s' raised: %s", block.name, exc)
+
+            tool_results.append({
+                "type": "tool_result",
+                "tool_use_id": block.id,
+                "content": str(result_text),
+            })
+
+        self.messages.append({"role": "user", "content": tool_results})
+
+    # ---------------------------------------------------------------------- #
+    # Public interface
+    # ---------------------------------------------------------------------- #
+
+    def ask(self, message: str) -> LLMAskResponse:
         self.retries = 0  # reset retries for each ask. Handled in parent class.
 
         self.messages.append({"role": "user", "content": message})
 
         valid = False
         while not valid:
-            response = self.ai_client.messages.create(
-                model=self.deployment_name,
-                system=self.system_prompt,
-                messages=self.messages,
-                max_tokens=4096,
-            )
-
-            # Extract text content from response
-            response_text = response.content[0].text if response.content else ""
+            response = self._call_api()
+
+            # Dispatch any tool_use rounds before looking for a JSON response.
+            # The model may call the memory tool multiple times before producing
+            # its final JSON command.
+            while response.stop_reason == "tool_use":
+                self._dispatch_tool_use(response)
+                response = self._call_api()
+
+            # Extract text content from the final response
+            response_text = ""
+            for block in response.content:
+                if block.type == "text":
+                    response_text = block.text
+                    break
+
             logger.debug("Raw Anthropic response (first 500 chars): %s", response_text[:500])
 
             # Try to extract JSON if wrapped in markdown code blocks
-            import re
             json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL)
             if json_match:
                 response_text = json_match.group(1)
diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
new file mode 100644
index 0000000..e80750c
--- /dev/null
+++ b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
@@ -0,0 +1,184 @@
+"""
+AnthropicMemoryTool — wraps Anthropic's memory tool.
+
+The memory tool lets the model persist information across conversations by
+reading and writing files in a local memory directory.  When the model invokes
+the tool, it sends a command (view, create, str_replace, insert, delete,
+rename) and the client executes it against a local filesystem directory.
+
+This implementation extends both:
+  - ``MemoryTool``:  provides all file-operation logic (_resolve, _view,
+    _create, _str_replace, _insert, _delete, _rename, _clear) and the
+    ToolAbstract duck-typing interface.
+  - ``BetaAbstractMemoryTool`` (SDK): provides native Anthropic dispatch and
+    the ``to_dict()`` / ``call()`` interface required by AnthropicApi.
+
+The SDK command-handler overrides (view, create, str_replace, insert, delete,
+rename) simply translate SDK command objects → arg lists and delegate to the
+inherited MemoryTool private methods, converting the CmdReturn back to a
+string as the SDK expects.
+
+The memory tool (type ``memory_20250818``) is available in the standard
+Anthropic library and does not require a beta endpoint or header.  Pass it
+via ``tools=[{"type": "memory_20250818", "name": "memory"}]`` on a regular
+``client.messages.create(...)`` call.  ``AnthropicApi`` handles this
+automatically when ``native_tools`` contains an ``AnthropicMemoryTool``.
+
+Usage:
+    from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
+
+    memory = AnthropicMemoryTool()
+    bot = ReadingBot(..., additional_tools=[memory])
+"""
+
+from __future__ import annotations
+
+from logging import getLogger
+from pathlib import Path
+
+from typing_extensions import override
+
+from anthropic.lib.tools import BetaAbstractMemoryTool as _SDKMemoryTool
+from anthropic.types.beta import (
+    BetaMemoryTool20250818CreateCommand,
+    BetaMemoryTool20250818DeleteCommand,
+    BetaMemoryTool20250818InsertCommand,
+    BetaMemoryTool20250818RenameCommand,
+    BetaMemoryTool20250818StrReplaceCommand,
+    BetaMemoryTool20250818ViewCommand,
+)
+
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
+
+logger = getLogger(__name__)
+
+DEFAULT_MEMORY_INSTRUCTIONS = (
+    "MEMORY PROTOCOL:\n"
+    "1. ALWAYS view your memory directory BEFORE doing anything else "
+    "using the `view` command of your `memory` tool to check for earlier progress.\n"
+    "2. As you make progress on the task, record status, progress, "
+    "and key findings in your memory using the memory tool.\n"
+    "3. ASSUME INTERRUPTION: Your context window might be reset at any moment, "
+    "so you risk losing any progress that is not recorded in your memory directory.\n"
+    "4. Before completing a task, always save your final results and analysis to memory.\n"
+    "5. When editing your memory folder, always keep its content up-to-date, coherent "
+    "and organized. Rename or delete files that are no longer relevant. "
+    "Do not create new files unless necessary.\n\n"
+    "IMPORTANT: The memory tool ONLY works with paths under /memories/. "
+    "Do NOT use the memory tool to access the repository or workdir. "
+    "Use shell commands (ls, cat, etc.) for filesystem access."
+)
+
+
+class AnthropicMemoryTool(MemoryTool, _SDKMemoryTool):
+    """
+    Anthropic's built-in memory tool, backed by MemoryTool's file logic.
+
+    Inherits file-operation logic from ``MemoryTool`` (plain Python class) and
+    the SDK's native dispatch interface from ``BetaAbstractMemoryTool``.
+
+    The SDK command-handler overrides delegate to the inherited private methods
+    (``_view``, ``_create``, etc.), translating the SDK ``Command`` objects to
+    the ``args: list`` format that those methods expect, and converting the
+    returned ``CmdReturn`` to the string that the SDK API requires.
+
+    Parameters
+    ----------
+    memory_dir : str | Path | None
+        Root directory for memory files.  Defaults to ``~/.microbots/memory``.
+    usage_instructions : str | None
+        Custom instructions appended to the system prompt for the LLM.
+        Defaults to ``DEFAULT_MEMORY_INSTRUCTIONS``.
+    """
+
+    def __init__(
+        self,
+        memory_dir: str | Path | None = None,
+        usage_instructions: str | None = None,
+    ) -> None:
+        MemoryTool.__init__(
+            self,
+            memory_dir=str(memory_dir) if memory_dir else None,
+            usage_instructions_to_llm=(
+                usage_instructions
+                if usage_instructions is not None
+                else DEFAULT_MEMORY_INSTRUCTIONS
+            ),
+        )
+        _SDKMemoryTool.__init__(self)  # type: ignore[call-arg]
+
+    # ---------------------------------------------------------------------- #
+    # ToolAbstract duck-typing overrides
+    # ---------------------------------------------------------------------- #
+
+    def is_model_supported(self, model_name: str) -> bool:
+        """Only Anthropic (Claude) models support the native memory tool."""
+        return "claude" in model_name.lower()
+
+    def is_invoked(self, command: str) -> bool:
+        """Return False — this tool is dispatched natively by AnthropicApi,
+        not via the shell command loop."""
+        return False
+
+    def clear_all(self) -> None:
+        """Delete all memory files (useful for testing or resetting state)."""
+        self._clear()
+        logger.info("🧠 AnthropicMemoryTool: memory cleared at %s", self._memory_dir)
+
+    # ---------------------------------------------------------------------- #
+    # BetaAbstractMemoryTool overrides — delegate to MemoryTool private methods
+    # ---------------------------------------------------------------------- #
+
+    @override
+    def clear_all_memory(self) -> str:
+        self.clear_all()
+        return "All memory cleared"
+
+    @override
+    def view(self, command: BetaMemoryTool20250818ViewCommand) -> str:
+        args = [command.path]
+        if command.view_range:
+            args += ["--start", str(command.view_range[0]), "--end", str(command.view_range[1])]
+        result = self._view(args)
+        if result.return_code != 0:
+            raise RuntimeError(result.stderr)
+        return result.stdout
+
+    @override
+    def create(self, command: BetaMemoryTool20250818CreateCommand) -> str:
+        result = self._create([command.path, command.file_text])
+        if result.return_code != 0:
+            raise RuntimeError(result.stderr)
+        return f"File created successfully at {command.path}"
+
+    @override
+    def str_replace(self, command: BetaMemoryTool20250818StrReplaceCommand) -> str:
+        result = self._str_replace([command.path, "--old", command.old_str, "--new", command.new_str])
+        if result.return_code != 0:
+            raise RuntimeError(result.stderr)
+        return f"File {command.path} has been edited"
+
+    @override
+    def insert(self, command: BetaMemoryTool20250818InsertCommand) -> str:
+        result = self._insert([
+            command.path,
+            "--line", str(command.insert_line),
+            "--text", command.insert_text,
+        ])
+        if result.return_code != 0:
+            raise RuntimeError(result.stderr)
+        return f"Text inserted at line {command.insert_line} in {command.path}"
+
+    @override
+    def delete(self, command: BetaMemoryTool20250818DeleteCommand) -> str:
+        result = self._delete([command.path])
+        if result.return_code != 0:
+            raise RuntimeError(result.stderr)
+        return result.stdout
+
+    @override
+    def rename(self, command: BetaMemoryTool20250818RenameCommand) -> str:
+        result = self._rename([command.old_path, command.new_path])
+        if result.return_code != 0:
+            raise RuntimeError(result.stderr)
+        return result.stdout
diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py
new file mode 100644
index 0000000..912fe6d
--- /dev/null
+++ b/src/microbots/tools/tool_definitions/memory_tool.py
@@ -0,0 +1,312 @@
+import logging
+import shlex
+import shutil
+from pathlib import Path
+from typing import Optional
+
+from pydantic.dataclasses import dataclass, Field
+
+from microbots.environment.Environment import CmdReturn
+from microbots.tools.external_tool import ExternalTool
+
+logger = logging.getLogger(" 🧠 MemoryTool")
+
+INSTRUCTIONS_TO_LLM = """
+Use this tool to persist information to files across steps — same interface as
+the Anthropic memory tool.  All paths must be under /memories/.
+
+MEMORY PROTOCOL:
+1. ALWAYS run `memory view /memories` BEFORE doing anything else to check for
+   earlier progress.
+2. Record status, findings and intermediate results as you go.
+3. Before completing a task, save your final results to memory.
+4. Keep the memory folder organised — rename or delete stale files.
+
+## Commands
+
+View a file or list a directory:
+  memory view <path>
+  memory view <path> --start <line> --end <line>
+
+Create a file:
+  memory create <path> <content>
+
+Replace a unique string in a file:
+  memory str_replace <path> --old "<old_text>" --new "<new_text>"
+
+Insert a line into a file (0 = prepend):
+  memory insert <path> --line <line_number> --text "<text>"
+
+Delete a file or directory:
+  memory delete <path>
+
+Rename / move a file:
+  memory rename <old_path> <new_path>
+
+Clear all memory:
+  memory clear
+
+## Examples
+
+  memory view /memories
+  memory create /memories/progress.md "## Progress\\n- Found bug in src/foo.py line 42"
+  memory str_replace /memories/progress.md --old "line 42" --new "line 45"
+  memory insert /memories/progress.md --line 0 --text "# Task Notes"
+  memory view /memories/progress.md --start 1 --end 10
+  memory delete /memories/old_notes.md
+  memory rename /memories/draft.md /memories/final.md
+
+## Notes
+- Paths must start with /memories/.
+- memory create overwrites if the file already exists.
+- memory str_replace requires the old text to appear exactly once.
+"""
+
+
+@dataclass
+class MemoryTool(ExternalTool):
+    """
+    File-backed memory tool that mirrors the ``AnthropicMemoryTool`` interface
+    but dispatches through the text command loop (compatible with all providers).
+
+    Subclass of ``ExternalTool`` — all command lists are empty so
+    ``install_tool``, ``setup_tool``, ``verify_tool_installation``, and
+    ``uninstall_tool`` are all effective no-ops inherited from ``ExternalTool``.
+
+    All files are stored under ``memory_dir`` on the host (default
+    ``~/.microbots/memory``).  The LLM uses paths like ``/memories/notes.md``
+    which are resolved relative to ``memory_dir``.
+
+    Supported subcommands
+    ---------------------
+    memory view <path> [--start N] [--end N]
+    memory create <path> <content>
+    memory str_replace <path> --old <text> --new <text>
+    memory insert <path> --line N --text <text>
+    memory delete <path>
+    memory rename <old> <new>
+    memory clear
+    """
+
+    name: str = Field(default="memory")
+    description: str = Field(
+        default="File-backed memory store — view, create, edit, delete files under /memories/."
+    )
+    usage_instructions_to_llm: str = Field(default=INSTRUCTIONS_TO_LLM)
+    memory_dir: Optional[str] = Field(default=None)
+
+    def __post_init__(self):
+        base = Path(self.memory_dir) if self.memory_dir else Path.home() / ".microbots" / "memory"
+        self._memory_dir = base
+        self._memory_dir.mkdir(parents=True, exist_ok=True)
+
+    def is_model_supported(self, model_name: str) -> bool:
+        return True
+
+    # ---------------------------------------------------------------------- #
+    # Path helpers
+    # ---------------------------------------------------------------------- #
+
+    def _resolve(self, path: str) -> Path:
+        """Resolve a /memories/… path to an absolute host path."""
+        stripped = path.lstrip("/")
+        if stripped == "memories":
+            rel = ""
+        elif stripped.startswith("memories/"):
+            rel = stripped[len("memories/"):]
+        elif stripped.startswith(("workdir", "home", "tmp", "var", "etc", "usr")):
+            raise ValueError(
+                f"Invalid memory path: {path!r}. Use paths under /memories/."
+            )
+        else:
+            rel = stripped  # treat as relative to memory_dir
+
+        resolved = (self._memory_dir / rel).resolve() if rel else self._memory_dir.resolve()
+        if not str(resolved).startswith(str(self._memory_dir.resolve())):
+            raise ValueError(f"Path traversal not allowed: {path!r}")
+        return resolved
+
+    # ---------------------------------------------------------------------- #
+    # ToolAbstract interface
+    # ---------------------------------------------------------------------- #
+
+    def is_invoked(self, command: str) -> bool:
+        return command.strip().startswith("memory ")
+
+    def invoke(self, command: str, parent_bot) -> CmdReturn:
+        try:
+            tokens = shlex.split(command)
+        except ValueError as exc:
+            return CmdReturn(stdout="", stderr=f"Parse error: {exc}", return_code=1)
+
+        if len(tokens) < 2:
+            return CmdReturn(stdout="", stderr="Usage: memory <subcommand> ...", return_code=1)
+
+        sub = tokens[1]
+        args = tokens[2:]
+
+        try:
+            if sub == "view":
+                return self._view(args)
+            elif sub == "create":
+                return self._create(args)
+            elif sub == "str_replace":
+                return self._str_replace(args)
+            elif sub == "insert":
+                return self._insert(args)
+            elif sub == "delete":
+                return self._delete(args)
+            elif sub == "rename":
+                return self._rename(args)
+            elif sub == "clear":
+                return self._clear()
+            else:
+                return CmdReturn(stdout="", stderr=f"Unknown subcommand: {sub!r}", return_code=1)
+        except (ValueError, FileNotFoundError, RuntimeError) as exc:
+            logger.error("🧠 MemoryTool error: %s", exc)
+            return CmdReturn(stdout="", stderr=str(exc), return_code=1)
+
+    # ---------------------------------------------------------------------- #
+    # Subcommand handlers
+    # ---------------------------------------------------------------------- #
+
+    def _view(self, args: list) -> CmdReturn:
+        if not args:
+            return CmdReturn(stdout="", stderr="Usage: memory view <path> [--start N] [--end N]", return_code=1)
+
+        path = args[0]
+        start_line = None
+        end_line = None
+        i = 1
+        while i < len(args):
+            if args[i] == "--start" and i + 1 < len(args):
+                start_line = int(args[i + 1]); i += 2
+            elif args[i] == "--end" and i + 1 < len(args):
+                end_line = int(args[i + 1]); i += 2
+            else:
+                i += 1
+
+        resolved = self._resolve(path)
+        if not resolved.exists():
+            raise RuntimeError(f"Path not found: {path!r}")
+
+        if resolved.is_dir():
+            items = [
+                (f"{item.name}/" if item.is_dir() else item.name)
+                for item in sorted(resolved.iterdir())
+                if not item.name.startswith(".")
+            ]
+            result = f"Directory: {path}\n" + "\n".join(f"- {i}" for i in items)
+            return CmdReturn(stdout=result, stderr="", return_code=0)
+
+        lines = resolved.read_text(encoding="utf-8").splitlines()
+        if start_line is not None or end_line is not None:
+            s = max(0, (start_line or 1) - 1)
+            e = len(lines) if (end_line is None or end_line == -1) else end_line
+            lines = lines[s:e]
+            base_num = s + 1
+        else:
+            base_num = 1
+        numbered = "\n".join(f"{i + base_num:4d}: {line}" for i, line in enumerate(lines))
+        return CmdReturn(stdout=numbered, stderr="", return_code=0)
+
+    def _create(self, args: list) -> CmdReturn:
+        if len(args) < 2:
+            return CmdReturn(stdout="", stderr="Usage: memory create <path> <content>", return_code=1)
+        path, content = args[0], args[1]
+        resolved = self._resolve(path)
+        resolved.parent.mkdir(parents=True, exist_ok=True)
+        resolved.write_text(content, encoding="utf-8")
+        logger.info("🧠 Memory file created: %s", path)
+        return CmdReturn(stdout=f"File created: {path}", stderr="", return_code=0)
+
+    def _str_replace(self, args: list) -> CmdReturn:
+        if not args:
+            return CmdReturn(stdout="", stderr="Usage: memory str_replace <path> --old <text> --new <text>", return_code=1)
+        path = args[0]
+        old_text = new_text = None
+        i = 1
+        while i < len(args):
+            if args[i] == "--old" and i + 1 < len(args):
+                old_text = args[i + 1]; i += 2
+            elif args[i] == "--new" and i + 1 < len(args):
+                new_text = args[i + 1]; i += 2
+            else:
+                i += 1
+        if old_text is None or new_text is None:
+            return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1)
+        resolved = self._resolve(path)
+        if not resolved.is_file():
+            raise FileNotFoundError(f"File not found: {path!r}")
+        content = resolved.read_text(encoding="utf-8")
+        count = content.count(old_text)
+        if count == 0:
+            raise ValueError(f"Text not found in {path!r}")
+        if count > 1:
+            raise ValueError(f"Text appears {count} times in {path!r} — must be unique")
+        resolved.write_text(content.replace(old_text, new_text, 1), encoding="utf-8")
+        return CmdReturn(stdout=f"File {path} edited.", stderr="", return_code=0)
+
+    def _insert(self, args: list) -> CmdReturn:
+        if not args:
+            return CmdReturn(stdout="", stderr="Usage: memory insert <path> --line N --text <text>", return_code=1)
+        path = args[0]
+        line_num = text = None
+        i = 1
+        while i < len(args):
+            if args[i] == "--line" and i + 1 < len(args):
+                line_num = int(args[i + 1]); i += 2
+            elif args[i] == "--text" and i + 1 < len(args):
+                text = args[i + 1]; i += 2
+            else:
+                i += 1
+        if line_num is None or text is None:
+            return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1)
+        resolved = self._resolve(path)
+        if not resolved.is_file():
+            raise FileNotFoundError(f"File not found: {path!r}")
+        lines = resolved.read_text(encoding="utf-8").splitlines()
+        if line_num < 0 or line_num > len(lines):
+            raise ValueError(f"Invalid line number {line_num}. Must be 0–{len(lines)}.")
+        lines.insert(line_num, text.rstrip("\n"))
+        resolved.write_text("\n".join(lines) + "\n", encoding="utf-8")
+        return CmdReturn(stdout=f"Text inserted at line {line_num} in {path}.", stderr="", return_code=0)
+
+    def _delete(self, args: list) -> CmdReturn:
+        if not args:
+            return CmdReturn(stdout="", stderr="Usage: memory delete <path>", return_code=1)
+        path = args[0]
+        if path.rstrip("/") in ("/memories", "memories", ""):
+            raise ValueError("Cannot delete the /memories root directory")
+        resolved = self._resolve(path)
+        if resolved.is_file():
+            resolved.unlink()
+            logger.info("🧠 Memory file deleted: %s", path)
+            return CmdReturn(stdout=f"Deleted: {path}", stderr="", return_code=0)
+        if resolved.is_dir():
+            shutil.rmtree(resolved)
+            logger.info("🧠 Memory directory deleted: %s", path)
+            return CmdReturn(stdout=f"Deleted directory: {path}", stderr="", return_code=0)
+        raise FileNotFoundError(f"Path not found: {path!r}")
+
+    def _rename(self, args: list) -> CmdReturn:
+        if len(args) < 2:
+            return CmdReturn(stdout="", stderr="Usage: memory rename <old_path> <new_path>", return_code=1)
+        old_path, new_path = args[0], args[1]
+        old_resolved = self._resolve(old_path)
+        new_resolved = self._resolve(new_path)
+        if not old_resolved.exists():
+            raise FileNotFoundError(f"Source not found: {old_path!r}")
+        if new_resolved.exists():
+            raise ValueError(f"Destination already exists: {new_path!r}")
+        new_resolved.parent.mkdir(parents=True, exist_ok=True)
+        old_resolved.rename(new_resolved)
+        logger.info("🧠 Memory renamed: %s → %s", old_path, new_path)
+        return CmdReturn(stdout=f"Renamed {old_path} to {new_path}.", stderr="", return_code=0)
+
+    def _clear(self) -> CmdReturn:
+        if self._memory_dir.exists():
+            shutil.rmtree(self._memory_dir)
+            self._memory_dir.mkdir(parents=True, exist_ok=True)
+        logger.info("🧠 Memory cleared.")
+        return CmdReturn(stdout="Memory cleared.", stderr="", return_code=0)

From 315cc03ad359947624f5ebdbc09efed0af094ae4 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Tue, 3 Mar 2026 10:51:30 +0000
Subject: [PATCH 02/10] Refactor MemoryTool error handling to return CmdReturn
 instead of raising exceptions and add tests

---
 .../tools/tool_definitions/memory_tool.py     |  20 +-
 test/llm/test_anthropic_api.py                | 367 ++++++++++++++
 .../tool_definitions/test_memory_tool.py      | 449 ++++++++++++++++++
 3 files changed, 826 insertions(+), 10 deletions(-)
 create mode 100644 test/tools/tool_definitions/test_memory_tool.py

diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py
index 912fe6d..310f880 100644
--- a/src/microbots/tools/tool_definitions/memory_tool.py
+++ b/src/microbots/tools/tool_definitions/memory_tool.py
@@ -188,7 +188,7 @@ def _view(self, args: list) -> CmdReturn:
 
         resolved = self._resolve(path)
         if not resolved.exists():
-            raise RuntimeError(f"Path not found: {path!r}")
+            return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1)
 
         if resolved.is_dir():
             items = [
@@ -237,13 +237,13 @@ def _str_replace(self, args: list) -> CmdReturn:
             return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1)
         resolved = self._resolve(path)
         if not resolved.is_file():
-            raise FileNotFoundError(f"File not found: {path!r}")
+            return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1)
         content = resolved.read_text(encoding="utf-8")
         count = content.count(old_text)
         if count == 0:
-            raise ValueError(f"Text not found in {path!r}")
+            return CmdReturn(stdout="", stderr=f"Text not found in {path!r}", return_code=1)
         if count > 1:
-            raise ValueError(f"Text appears {count} times in {path!r} — must be unique")
+            return CmdReturn(stdout="", stderr=f"Text appears {count} times in {path!r} — must be unique", return_code=1)
         resolved.write_text(content.replace(old_text, new_text, 1), encoding="utf-8")
         return CmdReturn(stdout=f"File {path} edited.", stderr="", return_code=0)
 
@@ -264,10 +264,10 @@ def _insert(self, args: list) -> CmdReturn:
             return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1)
         resolved = self._resolve(path)
         if not resolved.is_file():
-            raise FileNotFoundError(f"File not found: {path!r}")
+            return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1)
         lines = resolved.read_text(encoding="utf-8").splitlines()
         if line_num < 0 or line_num > len(lines):
-            raise ValueError(f"Invalid line number {line_num}. Must be 0–{len(lines)}.")
+            return CmdReturn(stdout="", stderr=f"Invalid line number {line_num}. Must be 0–{len(lines)}.", return_code=1)
         lines.insert(line_num, text.rstrip("\n"))
         resolved.write_text("\n".join(lines) + "\n", encoding="utf-8")
         return CmdReturn(stdout=f"Text inserted at line {line_num} in {path}.", stderr="", return_code=0)
@@ -277,7 +277,7 @@ def _delete(self, args: list) -> CmdReturn:
             return CmdReturn(stdout="", stderr="Usage: memory delete <path>", return_code=1)
         path = args[0]
         if path.rstrip("/") in ("/memories", "memories", ""):
-            raise ValueError("Cannot delete the /memories root directory")
+            return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1)
         resolved = self._resolve(path)
         if resolved.is_file():
             resolved.unlink()
@@ -287,7 +287,7 @@ def _delete(self, args: list) -> CmdReturn:
             shutil.rmtree(resolved)
             logger.info("🧠 Memory directory deleted: %s", path)
             return CmdReturn(stdout=f"Deleted directory: {path}", stderr="", return_code=0)
-        raise FileNotFoundError(f"Path not found: {path!r}")
+        return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1)
 
     def _rename(self, args: list) -> CmdReturn:
         if len(args) < 2:
@@ -296,9 +296,9 @@ def _rename(self, args: list) -> CmdReturn:
         old_resolved = self._resolve(old_path)
         new_resolved = self._resolve(new_path)
         if not old_resolved.exists():
-            raise FileNotFoundError(f"Source not found: {old_path!r}")
+            return CmdReturn(stdout="", stderr=f"Source not found: {old_path!r}", return_code=1)
         if new_resolved.exists():
-            raise ValueError(f"Destination already exists: {new_path!r}")
+            return CmdReturn(stdout="", stderr=f"Destination already exists: {new_path!r}", return_code=1)
         new_resolved.parent.mkdir(parents=True, exist_ok=True)
         old_resolved.rename(new_resolved)
         logger.info("🧠 Memory renamed: %s → %s", old_path, new_path)
diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py
index 674294c..49674aa 100644
--- a/test/llm/test_anthropic_api.py
+++ b/test/llm/test_anthropic_api.py
@@ -98,7 +98,9 @@ def test_ask_successful_response(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "echo 'hello'",
@@ -133,7 +135,9 @@ def test_ask_with_task_done_true(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": True,
             "command": "",
@@ -157,12 +161,16 @@ def test_ask_with_retry_on_invalid_response(self):
 
         # Mock the Anthropic client to return invalid then valid response
         mock_invalid_response = Mock()
+        mock_invalid_response.stop_reason = "end_turn"
         mock_invalid_content = Mock()
+        mock_invalid_content.type = "text"
         mock_invalid_content.text = "invalid json"
         mock_invalid_response.content = [mock_invalid_content]
 
         mock_valid_response = Mock()
+        mock_valid_response.stop_reason = "end_turn"
         mock_valid_content = Mock()
+        mock_valid_content.type = "text"
         mock_valid_content.text = json.dumps({
             "task_done": False,
             "command": "ls -la",
@@ -193,7 +201,9 @@ def test_ask_appends_user_message(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "pwd",
@@ -218,7 +228,9 @@ def test_ask_appends_assistant_response_as_json(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "echo test",
@@ -247,7 +259,9 @@ def test_ask_uses_asdict_for_response(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         response_dict = {
             "task_done": True,
             "command": "",
@@ -277,7 +291,9 @@ def test_ask_resets_retries_to_zero(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "ls",
@@ -299,7 +315,9 @@ def test_ask_extracts_json_from_markdown(self):
 
         # Mock response with markdown-wrapped JSON
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = """Here's the response:
 ```json
 {
@@ -420,7 +438,9 @@ def test_ask_with_empty_message(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "echo ''",
@@ -443,7 +463,9 @@ def test_multiple_ask_calls_append_messages(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "pwd",
@@ -513,6 +535,351 @@ def test_anthropic_api_clear_history_integration(self):
         assert len(api.messages) == 0  # Anthropic doesn't store system in messages
 
 
+# ============================================================================
+# Tests for native_tools support (new changes)
+# ============================================================================
+
+@pytest.mark.unit
+class TestAnthropicApiNativeToolsInit:
+    """Tests for __init__ native_tools caching."""
+
+    @pytest.fixture(autouse=True)
+    def _use_patch(self, patch_anthropic_config):
+        pass
+
+    def test_init_without_native_tools_has_empty_caches(self):
+        api = AnthropicApi(system_prompt="test")
+
+        assert api.native_tools == []
+        assert api._native_tool_dicts == []
+        assert api._native_tools_by_name == {}
+
+    def test_init_with_none_native_tools_has_empty_caches(self):
+        api = AnthropicApi(system_prompt="test", native_tools=None)
+
+        assert api._native_tool_dicts == []
+        assert api._native_tools_by_name == {}
+
+    def test_init_with_single_native_tool_caches_dict(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"}
+
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        assert api._native_tool_dicts == [{"name": "memory", "type": "memory_20250818"}]
+
+    def test_init_with_single_native_tool_caches_by_name(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        assert "memory" in api._native_tools_by_name
+        assert api._native_tools_by_name["memory"] is tool
+
+    def test_init_with_multiple_native_tools_caches_all(self):
+        tool1 = Mock()
+        tool1.to_dict.return_value = {"name": "memory"}
+        tool2 = Mock()
+        tool2.to_dict.return_value = {"name": "bash"}
+
+        api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2])
+
+        assert len(api._native_tool_dicts) == 2
+        assert api._native_tools_by_name["memory"] is tool1
+        assert api._native_tools_by_name["bash"] is tool2
+
+    def test_init_calls_to_dict_exactly_once_per_tool(self):
+        """to_dict() must not be called again on subsequent API calls."""
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+
+        AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        assert tool.to_dict.call_count == 1
+
+
+@pytest.mark.unit
+class TestAnthropicApiCallApiWithTools:
+    """Tests for _call_api including/excluding the tools kwarg."""
+
+    @pytest.fixture(autouse=True)
+    def _use_patch(self, patch_anthropic_config):
+        pass
+
+    def test_call_api_without_tools_omits_tools_kwarg(self):
+        api = AnthropicApi(system_prompt="test", deployment_name="claude-3")
+        api.messages = [{"role": "user", "content": "hello"}]
+        api.ai_client.messages.create = Mock(return_value=Mock())
+
+        api._call_api()
+
+        call_kwargs = api.ai_client.messages.create.call_args[1]
+        assert "tools" not in call_kwargs
+
+    def test_call_api_with_tools_passes_cached_dicts(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"}
+        api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool])
+        api.messages = [{"role": "user", "content": "hello"}]
+        api.ai_client.messages.create = Mock(return_value=Mock())
+
+        api._call_api()
+
+        call_kwargs = api.ai_client.messages.create.call_args[1]
+        assert "tools" in call_kwargs
+        assert call_kwargs["tools"] == [{"name": "memory", "type": "memory_20250818"}]
+
+    def test_call_api_does_not_call_to_dict_again(self):
+        """to_dict() should only be called during __init__, never during _call_api."""
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool])
+        api.messages = [{"role": "user", "content": "hello"}]
+        api.ai_client.messages.create = Mock(return_value=Mock())
+
+        count_after_init = tool.to_dict.call_count  # should be 1
+        api._call_api()
+        api._call_api()
+
+        assert tool.to_dict.call_count == count_after_init  # no increase
+
+
+@pytest.mark.unit
+class TestAnthropicApiDispatchToolUse:
+    """Tests for _dispatch_tool_use."""
+
+    @pytest.fixture(autouse=True)
+    def _use_patch(self, patch_anthropic_config):
+        pass
+
+    # ------------------------------------------------------------------ #
+    # Helpers
+    # ------------------------------------------------------------------ #
+
+    @staticmethod
+    def _tool_use_block(name, tool_id="tu_001", input_data=None):
+        block = Mock()
+        block.type = "tool_use"
+        block.name = name
+        block.id = tool_id
+        block.input = input_data or {}
+        block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": name}
+        return block
+
+    @staticmethod
+    def _text_block(text="hello"):
+        block = Mock()
+        block.type = "text"
+        block.text = text
+        block.model_dump.return_value = {"type": "text", "text": text}
+        return block
+
+    # ------------------------------------------------------------------ #
+    # Tests
+    # ------------------------------------------------------------------ #
+
+    def test_dispatch_appends_assistant_message_first(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "ok"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        response = Mock()
+        response.content = [self._tool_use_block("memory")]
+        api._dispatch_tool_use(response)
+
+        assert api.messages[0]["role"] == "assistant"
+
+    def test_dispatch_appends_tool_result_user_message(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "file listing"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        response = Mock()
+        response.content = [self._tool_use_block("memory", tool_id="tu_abc")]
+        api._dispatch_tool_use(response)
+
+        user_msg = api.messages[1]
+        assert user_msg["role"] == "user"
+        assert user_msg["content"][0]["type"] == "tool_result"
+        assert user_msg["content"][0]["tool_use_id"] == "tu_abc"
+        assert user_msg["content"][0]["content"] == "file listing"
+
+    def test_dispatch_calls_tool_with_correct_input(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "ok"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        input_data = {"command": "view", "path": "/memories"}
+        response = Mock()
+        response.content = [self._tool_use_block("memory", input_data=input_data)]
+        api._dispatch_tool_use(response)
+
+        tool.call.assert_called_once_with(input_data)
+
+    def test_dispatch_unknown_tool_returns_error_in_result(self):
+        api = AnthropicApi(system_prompt="test")  # no native tools
+
+        response = Mock()
+        response.content = [self._tool_use_block("unknown_tool", tool_id="tu_err")]
+        api._dispatch_tool_use(response)
+
+        content = api.messages[1]["content"][0]["content"]
+        assert "Error" in content
+        assert "unknown_tool" in content
+
+    def test_dispatch_tool_exception_returns_error_message(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.side_effect = RuntimeError("disk full")
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        response = Mock()
+        response.content = [self._tool_use_block("memory", tool_id="tu_exc")]
+        api._dispatch_tool_use(response)
+
+        content = api.messages[1]["content"][0]["content"]
+        assert "Error" in content
+        assert "disk full" in content
+
+    def test_dispatch_skips_non_tool_use_content_blocks(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "result"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        response = Mock()
+        response.content = [
+            self._text_block("thinking..."),
+            self._tool_use_block("memory", tool_id="tu_only"),
+        ]
+        api._dispatch_tool_use(response)
+
+        tool_results = api.messages[1]["content"]
+        assert len(tool_results) == 1
+        assert tool_results[0]["tool_use_id"] == "tu_only"
+
+    def test_dispatch_handles_multiple_tool_use_blocks(self):
+        tool1 = Mock()
+        tool1.to_dict.return_value = {"name": "memory"}
+        tool1.call.return_value = "memory result"
+        tool2 = Mock()
+        tool2.to_dict.return_value = {"name": "bash"}
+        tool2.call.return_value = "bash result"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2])
+
+        response = Mock()
+        response.content = [
+            self._tool_use_block("memory", tool_id="id_1"),
+            self._tool_use_block("bash", tool_id="id_2"),
+        ]
+        api._dispatch_tool_use(response)
+
+        results = api.messages[1]["content"]
+        assert len(results) == 2
+        assert results[0]["tool_use_id"] == "id_1"
+        assert results[0]["content"] == "memory result"
+        assert results[1]["tool_use_id"] == "id_2"
+        assert results[1]["content"] == "bash result"
+
+
+@pytest.mark.unit
+class TestAnthropicApiAskWithToolUseLoop:
+    """Tests for ask() cycling through tool_use rounds before returning JSON."""
+
+    @pytest.fixture(autouse=True)
+    def _use_patch(self, patch_anthropic_config):
+        pass
+
+    @staticmethod
+    def _tool_use_response(tool_name, tool_id):
+        block = Mock()
+        block.type = "tool_use"
+        block.name = tool_name
+        block.id = tool_id
+        block.input = {}
+        block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": tool_name}
+        response = Mock()
+        response.stop_reason = "tool_use"
+        response.content = [block]
+        return response
+
+    @staticmethod
+    def _text_response(json_dict):
+        block = Mock()
+        block.type = "text"
+        block.text = json.dumps(json_dict)
+        block.model_dump.return_value = {"type": "text", "text": block.text}
+        response = Mock()
+        response.stop_reason = "end_turn"
+        response.content = [block]
+        return response
+
+    def test_ask_dispatches_one_tool_use_round_then_returns(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "viewed /memories"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        tool_resp = self._tool_use_response("memory", "tu_1")
+        final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""})
+        api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp])
+
+        result = api.ask("do the task")
+
+        assert api.ai_client.messages.create.call_count == 2
+        tool.call.assert_called_once()
+        assert result.command == "ls /"
+
+    def test_ask_dispatches_multiple_tool_use_rounds(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "ok"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        tool_resp1 = self._tool_use_response("memory", "tu_1")
+        tool_resp2 = self._tool_use_response("memory", "tu_2")
+        final_resp = self._text_response({"task_done": True, "command": "", "thoughts": "done"})
+        api.ai_client.messages.create = Mock(side_effect=[tool_resp1, tool_resp2, final_resp])
+
+        result = api.ask("do the task")
+
+        assert api.ai_client.messages.create.call_count == 3
+        assert tool.call.call_count == 2
+        assert result.task_done is True
+
+    def test_ask_without_tool_use_does_not_dispatch(self):
+        api = AnthropicApi(system_prompt="test")
+
+        final_resp = self._text_response({"task_done": False, "command": "pwd", "thoughts": ""})
+        api.ai_client.messages.create = Mock(return_value=final_resp)
+
+        result = api.ask("where am I?")
+
+        assert api.ai_client.messages.create.call_count == 1
+        assert result.command == "pwd"
+
+    def test_ask_tool_use_messages_are_added_to_history(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "result"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        tool_resp = self._tool_use_response("memory", "tu_1")
+        final_resp = self._text_response({"task_done": False, "command": "echo hi", "thoughts": ""})
+        api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp])
+
+        api.ask("do it")
+
+        # Messages: user, assistant(tool_use), user(tool_result), assistant(final json)
+        roles = [m["role"] for m in api.messages]
+        assert roles.count("user") == 2
+        assert roles.count("assistant") == 2
+
+
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])
 
diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py
new file mode 100644
index 0000000..2c317f6
--- /dev/null
+++ b/test/tools/tool_definitions/test_memory_tool.py
@@ -0,0 +1,449 @@
+"""
+Unit tests for MemoryTool — file-backed memory store.
+
+All tests use pytest's tmp_path fixture so they are isolated from the
+user's real ~/.microbots/memory directory.
+"""
+import sys
+import os
+import pytest
+from pathlib import Path
+from unittest.mock import Mock
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../src")))
+
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
+from microbots.environment.Environment import CmdReturn
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_tool(tmp_path: Path) -> MemoryTool:
+    """Return a MemoryTool whose memory_dir lives under tmp_path."""
+    return MemoryTool(memory_dir=str(tmp_path / "memory"))
+
+
+# ---------------------------------------------------------------------------
+# Initialisation
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolInit:
+
+    def test_memory_dir_is_created_on_init(self, tmp_path):
+        mem_dir = tmp_path / "memory"
+        assert not mem_dir.exists()
+
+        make_tool(tmp_path)
+
+        assert mem_dir.exists()
+        assert mem_dir.is_dir()
+
+    def test_default_memory_dir_under_home(self, monkeypatch, tmp_path):
+        """When no memory_dir is given it falls back to ~/.microbots/memory."""
+        monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path))
+        tool = MemoryTool()
+        assert tool._memory_dir == tmp_path / ".microbots" / "memory"
+
+
+# ---------------------------------------------------------------------------
+# is_invoked
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolIsInvoked:
+
+    def test_returns_true_for_memory_commands(self, tmp_path):
+        tool = make_tool(tmp_path)
+        assert tool.is_invoked("memory view /memories") is True
+        assert tool.is_invoked("memory create /memories/f.md hello") is True
+
+    def test_returns_false_for_other_commands(self, tmp_path):
+        tool = make_tool(tmp_path)
+        assert tool.is_invoked("ls -la") is False
+        assert tool.is_invoked("cat file.txt") is False
+        assert tool.is_invoked("") is False
+
+    def test_strips_leading_whitespace(self, tmp_path):
+        tool = make_tool(tmp_path)
+        assert tool.is_invoked("  memory view /memories") is True
+
+
+# ---------------------------------------------------------------------------
+# Path resolution (_resolve)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolResolve:
+
+    def test_resolve_memories_root(self, tmp_path):
+        tool = make_tool(tmp_path)
+        assert tool._resolve("/memories") == tool._memory_dir.resolve()
+
+    def test_resolve_memories_subpath(self, tmp_path):
+        tool = make_tool(tmp_path)
+        resolved = tool._resolve("/memories/notes.md")
+        assert resolved == (tool._memory_dir / "notes.md").resolve()
+
+    def test_resolve_rejects_path_traversal(self, tmp_path):
+        tool = make_tool(tmp_path)
+        with pytest.raises(ValueError, match="Path traversal"):
+            tool._resolve("/memories/../../etc/passwd")
+
+    def test_resolve_rejects_non_memory_paths(self, tmp_path):
+        tool = make_tool(tmp_path)
+        for bad in ("/workdir/file", "/home/user/file", "/tmp/file"):
+            with pytest.raises(ValueError):
+                tool._resolve(bad)
+
+
+# ---------------------------------------------------------------------------
+# _view
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolView:
+
+    def test_view_directory_lists_contents(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "notes.md").write_text("hello")
+        (tool._memory_dir / "sub").mkdir()
+
+        result = tool._view(["/memories"])
+
+        assert result.return_code == 0
+        assert "notes.md" in result.stdout
+        assert "sub/" in result.stdout
+
+    def test_view_file_returns_numbered_lines(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\nline2\nline3\n")
+
+        result = tool._view(["/memories/f.md"])
+
+        assert result.return_code == 0
+        assert "1:" in result.stdout
+        assert "line1" in result.stdout
+        assert "3:" in result.stdout
+
+    def test_view_file_with_line_range(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n")
+
+        result = tool._view(["/memories/f.md", "--start", "2", "--end", "4"])
+
+        assert result.return_code == 0
+        assert "b" in result.stdout
+        assert "d" in result.stdout
+        assert "a" not in result.stdout
+        assert "e" not in result.stdout
+
+    def test_view_nonexistent_path_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+
+        result = tool._view(["/memories/nonexistent.md"])
+
+        assert result.return_code != 0
+
+    def test_view_no_args_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._view([])
+        assert result.return_code != 0
+
+
+# ---------------------------------------------------------------------------
+# _create
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolCreate:
+
+    def test_create_writes_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+
+        result = tool._create(["/memories/notes.md", "hello world"])
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "notes.md").read_text() == "hello world"
+
+    def test_create_overwrites_existing_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("old content")
+
+        result = tool._create(["/memories/f.md", "new content"])
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "f.md").read_text() == "new content"
+
+    def test_create_creates_parent_directories(self, tmp_path):
+        tool = make_tool(tmp_path)
+
+        result = tool._create(["/memories/sub/dir/f.md", "content"])
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "sub" / "dir" / "f.md").exists()
+
+    def test_create_missing_args_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._create(["/memories/f.md"])  # missing content
+        assert result.return_code != 0
+
+
+# ---------------------------------------------------------------------------
+# _str_replace
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolStrReplace:
+
+    def test_str_replace_replaces_unique_text(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello world")
+
+        result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "goodbye"])
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "f.md").read_text() == "goodbye world"
+
+    def test_str_replace_fails_when_text_not_found(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello world")
+
+        result = tool._str_replace(["/memories/f.md", "--old", "nothere", "--new", "x"])
+
+        assert result.return_code != 0
+        assert "not found" in result.stderr.lower()
+
+    def test_str_replace_fails_when_text_not_unique(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello hello")
+
+        result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "bye"])
+
+        assert result.return_code != 0
+        assert "2" in result.stderr  # appears N times
+
+    def test_str_replace_missing_flags_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("text")
+        result = tool._str_replace(["/memories/f.md"])
+        assert result.return_code != 0
+
+    def test_str_replace_nonexistent_file_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"])
+        assert result.return_code != 0
+
+
+# ---------------------------------------------------------------------------
+# _insert
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolInsert:
+
+    def test_insert_prepends_at_line_zero(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\nline2\n")
+
+        result = tool._insert(["/memories/f.md", "--line", "0", "--text", "prepended"])
+
+        assert result.return_code == 0
+        lines = (tool._memory_dir / "f.md").read_text().splitlines()
+        assert lines[0] == "prepended"
+        assert lines[1] == "line1"
+
+    def test_insert_at_end_of_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\nline2\n")
+
+        result = tool._insert(["/memories/f.md", "--line", "2", "--text", "appended"])
+
+        assert result.return_code == 0
+        lines = (tool._memory_dir / "f.md").read_text().splitlines()
+        assert lines[-1] == "appended"
+
+    def test_insert_invalid_line_number_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\n")
+
+        result = tool._insert(["/memories/f.md", "--line", "99", "--text", "x"])
+
+        assert result.return_code != 0
+
+    def test_insert_nonexistent_file_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._insert(["/memories/missing.md", "--line", "0", "--text", "x"])
+        assert result.return_code != 0
+
+    def test_insert_missing_flags_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\n")
+        result = tool._insert(["/memories/f.md"])
+        assert result.return_code != 0
+
+
+# ---------------------------------------------------------------------------
+# _delete
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolDelete:
+
+    def test_delete_removes_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        f = tool._memory_dir / "f.md"
+        f.write_text("data")
+
+        result = tool._delete(["/memories/f.md"])
+
+        assert result.return_code == 0
+        assert not f.exists()
+
+    def test_delete_removes_directory(self, tmp_path):
+        tool = make_tool(tmp_path)
+        sub = tool._memory_dir / "sub"
+        sub.mkdir()
+        (sub / "f.md").write_text("data")
+
+        result = tool._delete(["/memories/sub"])
+
+        assert result.return_code == 0
+        assert not sub.exists()
+
+    def test_delete_prevents_root_deletion(self, tmp_path):
+        tool = make_tool(tmp_path)
+        for path in ("/memories", "memories", "/memories/"):
+            result = tool._delete([path])
+            assert result.return_code != 0
+
+    def test_delete_nonexistent_path_raises(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._delete(["/memories/nonexistent.md"])
+        assert result.return_code != 0
+
+    def test_delete_no_args_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._delete([])
+        assert result.return_code != 0
+
+
+# ---------------------------------------------------------------------------
+# _rename
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolRename:
+
+    def test_rename_moves_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        src = tool._memory_dir / "old.md"
+        src.write_text("content")
+
+        result = tool._rename(["/memories/old.md", "/memories/new.md"])
+
+        assert result.return_code == 0
+        assert not src.exists()
+        assert (tool._memory_dir / "new.md").read_text() == "content"
+
+    def test_rename_nonexistent_source_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._rename(["/memories/missing.md", "/memories/new.md"])
+        assert result.return_code != 0
+
+    def test_rename_fails_if_destination_exists(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "a.md").write_text("a")
+        (tool._memory_dir / "b.md").write_text("b")
+
+        result = tool._rename(["/memories/a.md", "/memories/b.md"])
+
+        assert result.return_code != 0
+
+    def test_rename_missing_args_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._rename(["/memories/a.md"])
+        assert result.return_code != 0
+
+
+# ---------------------------------------------------------------------------
+# _clear
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolClear:
+
+    def test_clear_removes_all_files(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "a.md").write_text("a")
+        (tool._memory_dir / "b.md").write_text("b")
+
+        result = tool._clear()
+
+        assert result.return_code == 0
+        assert list(tool._memory_dir.iterdir()) == []
+
+    def test_clear_leaves_memory_dir_intact(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("data")
+
+        tool._clear()
+
+        assert tool._memory_dir.exists()
+
+
+# ---------------------------------------------------------------------------
+# invoke — full command dispatch
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolInvoke:
+
+    def test_invoke_view_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello")
+
+        result = tool.invoke("memory view /memories/f.md", parent_bot=Mock())
+
+        assert result.return_code == 0
+        assert "hello" in result.stdout
+
+    def test_invoke_create_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+
+        result = tool.invoke('memory create /memories/n.md "some content"', parent_bot=Mock())
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "n.md").read_text() == "some content"
+
+    def test_invoke_clear_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("data")
+
+        result = tool.invoke("memory clear", parent_bot=Mock())
+
+        assert result.return_code == 0
+        assert list(tool._memory_dir.iterdir()) == []
+
+    def test_invoke_unknown_subcommand_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool.invoke("memory frobnicate /memories/f.md", parent_bot=Mock())
+        assert result.return_code != 0
+        assert "Unknown subcommand" in result.stderr
+
+    def test_invoke_too_few_tokens_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool.invoke("memory", parent_bot=Mock())
+        assert result.return_code != 0
+
+    def test_invoke_handles_bad_quoting_gracefully(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool.invoke('memory create /memories/f.md "unclosed', parent_bot=Mock())
+        assert result.return_code != 0
+        assert "Parse error" in result.stderr
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

From 0260226a00118678c240951d5e66611b4cb8e0a1 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 3 Mar 2026 07:48:13 +0000
Subject: [PATCH 03/10] Initial plan


From ae8e01ef1941a529f3f4a6cc496ed2cf284e1869 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 3 Mar 2026 07:49:29 +0000
Subject: [PATCH 04/10] Add workflow_dispatch trigger to dockerBuildPush
 workflow

Co-authored-by: KavyaSree2610 <92566732+KavyaSree2610@users.noreply.github.com>
---
 .github/workflows/dockerBuildPush.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/dockerBuildPush.yml b/.github/workflows/dockerBuildPush.yml
index 08341d0..ae10709 100644
--- a/.github/workflows/dockerBuildPush.yml
+++ b/.github/workflows/dockerBuildPush.yml
@@ -1,6 +1,7 @@
 name: Build and Push to Docker Hub
 
 on:
+  workflow_dispatch:
   push:
     paths:
       - 'src/microbots/environment/local_docker/image_builder/Dockerfile'

From abba9026ae427110e0479f5f329f30cd7b8eea5a Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Thu, 5 Mar 2026 08:22:40 +0000
Subject: [PATCH 05/10] Modify comments in AnthropicMemoryTool implementation

---
 .../tools/tool_definitions/anthropic_memory_tool.py         | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
index e80750c..9cf547e 100644
--- a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
+++ b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
@@ -8,8 +8,8 @@
 
 This implementation extends both:
   - ``MemoryTool``:  provides all file-operation logic (_resolve, _view,
-    _create, _str_replace, _insert, _delete, _rename, _clear) and the
-    ToolAbstract duck-typing interface.
+    _create, _str_replace, _insert, _delete, _rename, _clear) and satisfies
+    the ``ToolAbstract`` ABC (install_tool, verify_tool_installation, etc.).
   - ``BetaAbstractMemoryTool`` (SDK): provides native Anthropic dispatch and
     the ``to_dict()`` / ``call()`` interface required by AnthropicApi.
 
@@ -108,7 +108,7 @@ def __init__(
         _SDKMemoryTool.__init__(self)  # type: ignore[call-arg]
 
     # ---------------------------------------------------------------------- #
-    # ToolAbstract duck-typing overrides
+    # ToolAbstract overrides
     # ---------------------------------------------------------------------- #
 
     def is_model_supported(self, model_name: str) -> bool:

From 213aa9e70bd249df48a74e1e956930ea14a76656 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Thu, 5 Mar 2026 09:01:00 +0000
Subject: [PATCH 06/10] Add unit tests for MemoryTool and AnthropicMemoryTool
 functionality

---
 test/bot/test_upgrade_tools_for_provider.py   | 161 ++++++++
 .../test_anthropic_memory_tool.py             | 347 ++++++++++++++++++
 .../tool_definitions/test_memory_tool.py      | 104 ++++++
 3 files changed, 612 insertions(+)
 create mode 100644 test/bot/test_upgrade_tools_for_provider.py
 create mode 100644 test/tools/tool_definitions/test_anthropic_memory_tool.py

diff --git a/test/bot/test_upgrade_tools_for_provider.py b/test/bot/test_upgrade_tools_for_provider.py
new file mode 100644
index 0000000..f5d1e98
--- /dev/null
+++ b/test/bot/test_upgrade_tools_for_provider.py
@@ -0,0 +1,161 @@
+"""
+Unit tests for MicroBot._upgrade_tools_for_provider.
+
+These tests verify that plain ``MemoryTool`` instances are automatically
+replaced with ``AnthropicMemoryTool`` when the model provider is Anthropic,
+and that no changes are made for other providers or other tool types.
+
+All tests bypass the heavy MicroBot constructor (Docker environment, LLM
+creation) by constructing an uninitialized instance with ``object.__new__``
+and manually setting only the attributes the method under test needs.
+"""
+import sys
+import os
+import logging
+import pytest
+from unittest.mock import patch, Mock
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")))
+
+from microbots.MicroBot import MicroBot
+from microbots.constants import ModelProvider
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
+from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _bare_microbot(model_provider: str, tools: list) -> MicroBot:
+    """Return an uninitialized MicroBot with only the attributes that
+    ``_upgrade_tools_for_provider`` inspects."""
+    bot = object.__new__(MicroBot)
+    bot.model_provider = model_provider
+    bot.additional_tools = list(tools)
+    return bot
+
+
+def _memory_tool(tmp_path, instructions: str = "default instructions") -> MemoryTool:
+    return MemoryTool(
+        memory_dir=str(tmp_path / "memory"),
+        usage_instructions_to_llm=instructions,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestUpgradeToolsForProvider:
+
+    # -- Anthropic provider: MemoryTool → AnthropicMemoryTool ---------------
+
+    def test_memory_tool_is_replaced_with_anthropic_variant(self, tmp_path):
+        tool = _memory_tool(tmp_path)
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool])
+
+        bot._upgrade_tools_for_provider()
+
+        assert len(bot.additional_tools) == 1
+        assert isinstance(bot.additional_tools[0], AnthropicMemoryTool)
+
+    def test_memory_dir_is_forwarded_to_upgraded_tool(self, tmp_path):
+        mem_dir = str(tmp_path / "my_memory")
+        tool = MemoryTool(memory_dir=mem_dir)
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool])
+
+        bot._upgrade_tools_for_provider()
+
+        upgraded = bot.additional_tools[0]
+        assert isinstance(upgraded, AnthropicMemoryTool)
+        assert str(upgraded.memory_dir) == mem_dir
+
+    def test_usage_instructions_are_forwarded_to_upgraded_tool(self, tmp_path):
+        custom_instructions = "custom memory instructions for test"
+        tool = _memory_tool(tmp_path, instructions=custom_instructions)
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool])
+
+        bot._upgrade_tools_for_provider()
+
+        upgraded = bot.additional_tools[0]
+        assert upgraded.usage_instructions_to_llm == custom_instructions
+
+    def test_already_anthropic_memory_tool_is_not_re_upgraded(self, tmp_path):
+        existing = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory"))
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [existing])
+
+        bot._upgrade_tools_for_provider()
+
+        assert len(bot.additional_tools) == 1
+        assert bot.additional_tools[0] is existing
+
+    def test_non_memory_tools_are_kept_unchanged(self, tmp_path):
+        other_tool = Mock()
+        other_tool.__class__ = Mock  # not a MemoryTool subclass
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [other_tool])
+
+        bot._upgrade_tools_for_provider()
+
+        assert len(bot.additional_tools) == 1
+        assert bot.additional_tools[0] is other_tool
+
+    def test_mixed_tool_list_upgrades_only_memory_tools(self, tmp_path):
+        plain_memory = _memory_tool(tmp_path)
+        already_upgraded = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory2"))
+        other_tool = Mock(spec=[])
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [plain_memory, already_upgraded, other_tool])
+
+        bot._upgrade_tools_for_provider()
+
+        assert len(bot.additional_tools) == 3
+        # first: should have been upgraded
+        assert isinstance(bot.additional_tools[0], AnthropicMemoryTool)
+        assert bot.additional_tools[0] is not plain_memory
+        # second: already AnthropicMemoryTool, untouched
+        assert bot.additional_tools[1] is already_upgraded
+        # third: non-memory tool, untouched
+        assert bot.additional_tools[2] is other_tool
+
+    def test_empty_tool_list_is_a_no_op(self):
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [])
+
+        bot._upgrade_tools_for_provider()
+
+        assert bot.additional_tools == []
+
+    def test_logger_info_called_for_each_upgraded_tool(self, tmp_path, caplog):
+        tool1 = _memory_tool(tmp_path)
+        tmp_path2 = tmp_path / "sub"
+        tmp_path2.mkdir()
+        tool2 = _memory_tool(tmp_path2)
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool1, tool2])
+
+        with caplog.at_level(logging.INFO, logger=" MicroBot "):
+            bot._upgrade_tools_for_provider()
+
+        upgrade_logs = [r for r in caplog.records if "Auto-upgrading" in r.message]
+        assert len(upgrade_logs) == 2
+
+    # -- Non-Anthropic providers: no upgrade should happen ------------------
+
+    @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL])
+    def test_no_upgrade_for_non_anthropic_provider(self, tmp_path, provider):
+        tool = _memory_tool(tmp_path)
+        bot = _bare_microbot(provider, [tool])
+
+        bot._upgrade_tools_for_provider()
+
+        assert len(bot.additional_tools) == 1
+        assert isinstance(bot.additional_tools[0], MemoryTool)
+        assert not isinstance(bot.additional_tools[0], AnthropicMemoryTool)
+
+    @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL])
+    def test_original_tool_identity_preserved_for_non_anthropic(self, tmp_path, provider):
+        tool = _memory_tool(tmp_path)
+        bot = _bare_microbot(provider, [tool])
+
+        bot._upgrade_tools_for_provider()
+
+        assert bot.additional_tools[0] is tool
diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py
new file mode 100644
index 0000000..6668bfc
--- /dev/null
+++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py
@@ -0,0 +1,347 @@
+"""
+Unit tests for AnthropicMemoryTool.
+
+Covers:
+  - __init__: memory_dir / usage_instructions forwarding and defaults
+  - is_model_supported
+  - is_invoked
+  - clear_all / clear_all_memory (SDK override)
+  - SDK overrides: view, create, str_replace, insert, delete, rename
+    (happy-path + RuntimeError on failure)
+"""
+import logging
+import pytest
+
+from anthropic.types.beta import (
+    BetaMemoryTool20250818CreateCommand,
+    BetaMemoryTool20250818DeleteCommand,
+    BetaMemoryTool20250818InsertCommand,
+    BetaMemoryTool20250818RenameCommand,
+    BetaMemoryTool20250818StrReplaceCommand,
+    BetaMemoryTool20250818ViewCommand,
+)
+
+from microbots.tools.tool_definitions.anthropic_memory_tool import (
+    DEFAULT_MEMORY_INSTRUCTIONS,
+    AnthropicMemoryTool,
+)
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_tool(tmp_path) -> AnthropicMemoryTool:
+    return AnthropicMemoryTool(memory_dir=str(tmp_path / "memory"))
+
+
+# ---------------------------------------------------------------------------
+# __init__
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolInit:
+
+    def test_is_subclass_of_memory_tool(self, tmp_path):
+        assert isinstance(make_tool(tmp_path), MemoryTool)
+
+    def test_memory_dir_is_forwarded(self, tmp_path):
+        mem_dir = str(tmp_path / "my_memory")
+        tool = AnthropicMemoryTool(memory_dir=mem_dir)
+        assert str(tool._memory_dir) == mem_dir
+
+    def test_memory_dir_is_created_on_init(self, tmp_path):
+        mem_dir = tmp_path / "new_memory"
+        assert not mem_dir.exists()
+        AnthropicMemoryTool(memory_dir=str(mem_dir))
+        assert mem_dir.exists()
+
+    def test_default_memory_dir_under_home(self, monkeypatch, tmp_path):
+        from pathlib import Path
+        monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path))
+        tool = AnthropicMemoryTool()
+        assert tool._memory_dir == tmp_path / ".microbots" / "memory"
+
+    def test_custom_usage_instructions_are_stored(self, tmp_path):
+        custom = "custom instructions"
+        tool = AnthropicMemoryTool(
+            memory_dir=str(tmp_path / "memory"),
+            usage_instructions=custom,
+        )
+        assert tool.usage_instructions_to_llm == custom
+
+    def test_default_usage_instructions_are_applied_when_none(self, tmp_path):
+        tool = make_tool(tmp_path)
+        assert tool.usage_instructions_to_llm == DEFAULT_MEMORY_INSTRUCTIONS
+
+
+# ---------------------------------------------------------------------------
+# is_model_supported
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolIsModelSupported:
+
+    def test_returns_true_for_claude_models(self, tmp_path):
+        tool = make_tool(tmp_path)
+        for model in ("claude-3-sonnet", "claude-3-5-haiku", "Claude-Opus-4"):
+            assert tool.is_model_supported(model) is True
+
+    def test_returns_false_for_non_claude_models(self, tmp_path):
+        tool = make_tool(tmp_path)
+        for model in ("gpt-4", "ollama/llama3", "azure-openai/gpt-5", ""):
+            assert tool.is_model_supported(model) is False
+
+
+# ---------------------------------------------------------------------------
+# is_invoked
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolIsInvoked:
+
+    def test_always_returns_false(self, tmp_path):
+        tool = make_tool(tmp_path)
+        for cmd in ("memory view /memories", "memory clear", "anything", ""):
+            assert tool.is_invoked(cmd) is False
+
+
+# ---------------------------------------------------------------------------
+# clear_all / clear_all_memory
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolClearAll:
+
+    def test_clear_all_removes_all_files(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "a.md").write_text("a")
+        (tool._memory_dir / "b.md").write_text("b")
+
+        tool.clear_all()
+
+        assert list(tool._memory_dir.iterdir()) == []
+
+    def test_clear_all_leaves_memory_dir_intact(self, tmp_path):
+        tool = make_tool(tmp_path)
+        tool.clear_all()
+        assert tool._memory_dir.exists()
+
+    def test_clear_all_logs_info(self, tmp_path, caplog):
+        tool = make_tool(tmp_path)
+        with caplog.at_level(logging.INFO):
+            tool.clear_all()
+        assert "AnthropicMemoryTool" in caplog.text
+        assert "cleared" in caplog.text
+
+    def test_clear_all_memory_returns_string(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool.clear_all_memory()
+        assert result == "All memory cleared"
+
+    def test_clear_all_memory_removes_files(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("data")
+
+        tool.clear_all_memory()
+
+        assert list(tool._memory_dir.iterdir()) == []
+
+
+# ---------------------------------------------------------------------------
+# view (SDK override)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolView:
+
+    def test_view_returns_file_contents(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "notes.md").write_text("hello\nworld\n")
+
+        cmd = BetaMemoryTool20250818ViewCommand(
+            command="view", path="/memories/notes.md", view_range=None
+        )
+        result = tool.view(cmd)
+
+        assert "hello" in result
+        assert "world" in result
+
+    def test_view_with_view_range(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n")
+
+        cmd = BetaMemoryTool20250818ViewCommand(
+            command="view", path="/memories/f.md", view_range=[2, 4]
+        )
+        result = tool.view(cmd)
+
+        assert "b" in result
+        assert "d" in result
+        assert "a" not in result
+        assert "e" not in result
+
+    def test_view_raises_runtime_error_on_failure(self, tmp_path):
+        tool = make_tool(tmp_path)
+        cmd = BetaMemoryTool20250818ViewCommand(
+            command="view", path="/memories/nonexistent.md", view_range=None
+        )
+        with pytest.raises(RuntimeError):
+            tool.view(cmd)
+
+
+# ---------------------------------------------------------------------------
+# create (SDK override)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolCreate:
+
+    def test_create_writes_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        cmd = BetaMemoryTool20250818CreateCommand(
+            command="create", path="/memories/new.md", file_text="hello world"
+        )
+        result = tool.create(cmd)
+
+        assert "new.md" in result
+        assert (tool._memory_dir / "new.md").read_text() == "hello world"
+
+    def test_create_raises_runtime_error_on_failure(self, tmp_path):
+        tool = make_tool(tmp_path)
+        # Path traversal should cause _create to fail via _resolve
+        cmd = BetaMemoryTool20250818CreateCommand(
+            command="create", path="/memories/../../etc/evil.md", file_text="x"
+        )
+        with pytest.raises((RuntimeError, ValueError)):
+            tool.create(cmd)
+
+
+# ---------------------------------------------------------------------------
+# str_replace (SDK override)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolStrReplace:
+
+    def test_str_replace_edits_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello world")
+
+        cmd = BetaMemoryTool20250818StrReplaceCommand(
+            command="str_replace",
+            path="/memories/f.md",
+            old_str="hello",
+            new_str="goodbye",
+        )
+        result = tool.str_replace(cmd)
+
+        assert "f.md" in result
+        assert (tool._memory_dir / "f.md").read_text() == "goodbye world"
+
+    def test_str_replace_raises_runtime_error_on_failure(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello world")
+        cmd = BetaMemoryTool20250818StrReplaceCommand(
+            command="str_replace",
+            path="/memories/f.md",
+            old_str="not present",
+            new_str="x",
+        )
+        with pytest.raises(RuntimeError):
+            tool.str_replace(cmd)
+
+
+# ---------------------------------------------------------------------------
+# insert (SDK override)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolInsert:
+
+    def test_insert_prepends_line(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\nline2\n")
+
+        cmd = BetaMemoryTool20250818InsertCommand(
+            command="insert",
+            path="/memories/f.md",
+            insert_line=0,
+            insert_text="prepended",
+        )
+        result = tool.insert(cmd)
+
+        assert "0" in result or "prepended" in result or "f.md" in result
+        lines = (tool._memory_dir / "f.md").read_text().splitlines()
+        assert lines[0] == "prepended"
+
+    def test_insert_raises_runtime_error_on_failure(self, tmp_path):
+        tool = make_tool(tmp_path)
+        cmd = BetaMemoryTool20250818InsertCommand(
+            command="insert",
+            path="/memories/missing.md",
+            insert_line=0,
+            insert_text="x",
+        )
+        with pytest.raises(RuntimeError):
+            tool.insert(cmd)
+
+
+# ---------------------------------------------------------------------------
+# delete (SDK override)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolDelete:
+
+    def test_delete_removes_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("data")
+
+        cmd = BetaMemoryTool20250818DeleteCommand(
+            command="delete", path="/memories/f.md"
+        )
+        tool.delete(cmd)
+
+        assert not (tool._memory_dir / "f.md").exists()
+
+    def test_delete_raises_runtime_error_on_failure(self, tmp_path):
+        tool = make_tool(tmp_path)
+        cmd = BetaMemoryTool20250818DeleteCommand(
+            command="delete", path="/memories/nonexistent.md"
+        )
+        with pytest.raises(RuntimeError):
+            tool.delete(cmd)
+
+
+# ---------------------------------------------------------------------------
+# rename (SDK override)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolRename:
+
+    def test_rename_moves_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "old.md").write_text("content")
+
+        cmd = BetaMemoryTool20250818RenameCommand(
+            command="rename",
+            old_path="/memories/old.md",
+            new_path="/memories/new.md",
+        )
+        tool.rename(cmd)
+
+        assert not (tool._memory_dir / "old.md").exists()
+        assert (tool._memory_dir / "new.md").read_text() == "content"
+
+    def test_rename_raises_runtime_error_on_failure(self, tmp_path):
+        tool = make_tool(tmp_path)
+        cmd = BetaMemoryTool20250818RenameCommand(
+            command="rename",
+            old_path="/memories/missing.md",
+            new_path="/memories/new.md",
+        )
+        with pytest.raises(RuntimeError):
+            tool.rename(cmd)
diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py
index 2c317f6..5679d5e 100644
--- a/test/tools/tool_definitions/test_memory_tool.py
+++ b/test/tools/tool_definitions/test_memory_tool.py
@@ -98,6 +98,20 @@ def test_resolve_rejects_non_memory_paths(self, tmp_path):
             with pytest.raises(ValueError):
                 tool._resolve(bad)
 
+    def test_resolve_bare_relative_path_treated_as_relative_to_memory_dir(self, tmp_path):
+        """The else branch: a path without a /memories/ prefix is resolved
+        relative to memory_dir."""
+        tool = make_tool(tmp_path)
+        resolved = tool._resolve("notes.md")
+        assert resolved == (tool._memory_dir / "notes.md").resolve()
+
+    def test_resolve_bare_relative_subdir_path(self, tmp_path):
+        """A bare relative path with subdirectory components is also resolved
+        relative to memory_dir (else branch)."""
+        tool = make_tool(tmp_path)
+        resolved = tool._resolve("sub/dir/file.md")
+        assert resolved == (tool._memory_dir / "sub" / "dir" / "file.md").resolve()
+
 
 # ---------------------------------------------------------------------------
 # _view
@@ -231,6 +245,13 @@ def test_str_replace_missing_flags_returns_error(self, tmp_path):
         result = tool._str_replace(["/memories/f.md"])
         assert result.return_code != 0
 
+    def test_str_replace_empty_args_returns_usage_error(self, tmp_path):
+        """if not args branch: calling _str_replace([]) returns the usage message."""
+        tool = make_tool(tmp_path)
+        result = tool._str_replace([])
+        assert result.return_code == 1
+        assert "Usage: memory str_replace" in result.stderr
+
     def test_str_replace_nonexistent_file_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
         result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"])
@@ -284,6 +305,13 @@ def test_insert_missing_flags_returns_error(self, tmp_path):
         result = tool._insert(["/memories/f.md"])
         assert result.return_code != 0
 
+    def test_insert_empty_args_returns_usage_error(self, tmp_path):
+        """if not args branch: calling _insert([]) returns the usage message."""
+        tool = make_tool(tmp_path)
+        result = tool._insert([])
+        assert result.return_code == 1
+        assert "Usage: memory insert" in result.stderr
+
 
 # ---------------------------------------------------------------------------
 # _delete
@@ -394,6 +422,19 @@ def test_clear_leaves_memory_dir_intact(self, tmp_path):
         assert tool._memory_dir.exists()
 
 
+# ---------------------------------------------------------------------------
+# is_model_supported
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolIsModelSupported:
+
+    def test_returns_true_for_any_model(self, tmp_path):
+        tool = make_tool(tmp_path)
+        for model in ("gpt-4", "claude-3-sonnet", "ollama/llama3", ""):
+            assert tool.is_model_supported(model) is True
+
+
 # ---------------------------------------------------------------------------
 # invoke — full command dispatch
 # ---------------------------------------------------------------------------
@@ -444,6 +485,69 @@ def test_invoke_handles_bad_quoting_gracefully(self, tmp_path):
         assert result.return_code != 0
         assert "Parse error" in result.stderr
 
+    def test_invoke_str_replace_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello world")
+
+        result = tool.invoke(
+            'memory str_replace /memories/f.md --old "hello" --new "goodbye"',
+            parent_bot=Mock(),
+        )
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "f.md").read_text() == "goodbye world"
+
+    def test_invoke_insert_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\nline2\n")
+
+        result = tool.invoke(
+            'memory insert /memories/f.md --line 0 --text "prepended"',
+            parent_bot=Mock(),
+        )
+
+        assert result.return_code == 0
+        lines = (tool._memory_dir / "f.md").read_text().splitlines()
+        assert lines[0] == "prepended"
+
+    def test_invoke_delete_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+        f = tool._memory_dir / "f.md"
+        f.write_text("data")
+
+        result = tool.invoke("memory delete /memories/f.md", parent_bot=Mock())
+
+        assert result.return_code == 0
+        assert not f.exists()
+
+    def test_invoke_rename_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "old.md").write_text("content")
+
+        result = tool.invoke(
+            "memory rename /memories/old.md /memories/new.md",
+            parent_bot=Mock(),
+        )
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "new.md").read_text() == "content"
+        assert not (tool._memory_dir / "old.md").exists()
+
+    def test_invoke_exception_returned_as_error_cmdreturn(self, tmp_path):
+        """ValueError/FileNotFoundError/RuntimeError raised inside a subcommand
+        are caught and returned as a CmdReturn with return_code=1."""
+        tool = make_tool(tmp_path)
+
+        # str_replace on a non-existent file raises FileNotFoundError
+        result = tool.invoke(
+            'memory str_replace /memories/missing.md --old "x" --new "y"',
+            parent_bot=Mock(),
+        )
+
+        assert result.return_code == 1
+        assert result.stdout == ""
+        assert result.stderr != ""
+
 
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])

From b4d753e938ac4861970427068253df19009f8969 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Thu, 5 Mar 2026 09:23:30 +0000
Subject: [PATCH 07/10] Add tests for coverage

---
 .../test_anthropic_memory_tool.py             | 15 +++++--
 .../tool_definitions/test_memory_tool.py      | 40 ++++++++++++++++---
 2 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py
index 6668bfc..db7069e 100644
--- a/test/tools/tool_definitions/test_anthropic_memory_tool.py
+++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py
@@ -209,13 +209,20 @@ def test_create_writes_file(self, tmp_path):
         assert (tool._memory_dir / "new.md").read_text() == "hello world"
 
     def test_create_raises_runtime_error_on_failure(self, tmp_path):
+        """Ensures the `raise RuntimeError(result.stderr)` branch is exercised by
+        mocking _create to return a non-zero CmdReturn."""
+        from unittest.mock import patch
+        from microbots.environment.Environment import CmdReturn
+
         tool = make_tool(tmp_path)
-        # Path traversal should cause _create to fail via _resolve
         cmd = BetaMemoryTool20250818CreateCommand(
-            command="create", path="/memories/../../etc/evil.md", file_text="x"
+            command="create", path="/memories/new.md", file_text="x"
         )
-        with pytest.raises((RuntimeError, ValueError)):
-            tool.create(cmd)
+        with patch.object(
+            tool, "_create", return_value=CmdReturn(stdout="", stderr="disk full", return_code=1)
+        ):
+            with pytest.raises(RuntimeError, match="disk full"):
+                tool.create(cmd)
 
 
 # ---------------------------------------------------------------------------
diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py
index 5679d5e..089ca52 100644
--- a/test/tools/tool_definitions/test_memory_tool.py
+++ b/test/tools/tool_definitions/test_memory_tool.py
@@ -166,6 +166,14 @@ def test_view_no_args_returns_error(self, tmp_path):
         result = tool._view([])
         assert result.return_code != 0
 
+    def test_view_unknown_flag_is_skipped(self, tmp_path):
+        """else: i += 1 — unrecognised flags are silently skipped."""
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello\n")
+        result = tool._view(["/memories/f.md", "--bogus", "value"])
+        assert result.return_code == 0
+        assert "hello" in result.stdout
+
 
 # ---------------------------------------------------------------------------
 # _create
@@ -257,6 +265,16 @@ def test_str_replace_nonexistent_file_returns_error(self, tmp_path):
         result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"])
         assert result.return_code != 0
 
+    def test_str_replace_unknown_flag_is_skipped(self, tmp_path):
+        """else: i += 1 — unrecognised flags in the arg loop are silently skipped."""
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello world")
+        result = tool._str_replace([
+            "/memories/f.md", "--bogus", "ignored", "--old", "hello", "--new", "goodbye"
+        ])
+        assert result.return_code == 0
+        assert (tool._memory_dir / "f.md").read_text() == "goodbye world"
+
 
 # ---------------------------------------------------------------------------
 # _insert
@@ -312,6 +330,17 @@ def test_insert_empty_args_returns_usage_error(self, tmp_path):
         assert result.return_code == 1
         assert "Usage: memory insert" in result.stderr
 
+    def test_insert_unknown_flag_is_skipped(self, tmp_path):
+        """else: i += 1 — unrecognised flags in the arg loop are silently skipped."""
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\nline2\n")
+        result = tool._insert([
+            "/memories/f.md", "--bogus", "ignored", "--line", "0", "--text", "prepended"
+        ])
+        assert result.return_code == 0
+        lines = (tool._memory_dir / "f.md").read_text().splitlines()
+        assert lines[0] == "prepended"
+
 
 # ---------------------------------------------------------------------------
 # _delete
@@ -534,19 +563,20 @@ def test_invoke_rename_subcommand(self, tmp_path):
         assert not (tool._memory_dir / "old.md").exists()
 
     def test_invoke_exception_returned_as_error_cmdreturn(self, tmp_path):
-        """ValueError/FileNotFoundError/RuntimeError raised inside a subcommand
-        are caught and returned as a CmdReturn with return_code=1."""
+        """except (ValueError, FileNotFoundError, RuntimeError) block:
+        a path-traversal path causes _resolve() to raise ValueError inside a
+        subcommand handler, which is caught and returned as CmdReturn(return_code=1)."""
         tool = make_tool(tmp_path)
 
-        # str_replace on a non-existent file raises FileNotFoundError
+        # Path traversal triggers ValueError inside _view → caught by except block
         result = tool.invoke(
-            'memory str_replace /memories/missing.md --old "x" --new "y"',
+            "memory view /memories/../../etc/passwd",
             parent_bot=Mock(),
         )
 
         assert result.return_code == 1
         assert result.stdout == ""
-        assert result.stderr != ""
+        assert "traversal" in result.stderr.lower() or result.stderr != ""
 
 
 if __name__ == "__main__":

From 579e6a349355077676ea59211fe7ffc2f7ad6dcd Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Tue, 10 Mar 2026 11:54:18 +0000
Subject: [PATCH 08/10] move tool dispatch out of AnthropicApi into MicroBot
 run loop

---
 pytest.ini                                    |   1 +
 src/microbots/MicroBot.py                     |  39 +-
 src/microbots/llm/anthropic_api.py            | 117 +++--
 src/microbots/llm/llm.py                      |   9 +
 .../tool_definitions/anthropic_memory_tool.py |  37 +-
 test/bot/test_memory_tool_integration.py      | 439 ++++++++++++++++++
 test/bot/test_upgrade_tools_for_provider.py   | 110 ++---
 test/llm/test_anthropic_api.py                | 289 +++++-------
 .../test_anthropic_memory_tool.py             |  14 +-
 9 files changed, 710 insertions(+), 345 deletions(-)
 create mode 100644 test/bot/test_memory_tool_integration.py

diff --git a/pytest.ini b/pytest.ini
index d0e4f1f..80758ec 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -11,5 +11,6 @@ addopts =
 markers =
     unit: Unit tests
     integration: Integration tests
+    anthropic_integration: Integration tests requiring a real Anthropic API key
     slow: Slow tests
     docker: marks tests that require a running Docker daemon and pull container images
diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py
index 7b061d7..f2708b1 100644
--- a/src/microbots/MicroBot.py
+++ b/src/microbots/MicroBot.py
@@ -317,38 +317,7 @@ def _create_environment(self, folder_to_mount: Optional[Mount]):
             folder_to_mount=folder_to_mount,
         )
 
-    def _upgrade_tools_for_provider(self):
-        """Auto-upgrade provider-agnostic tools to their provider-optimised variants.
-
-        Currently: replaces any ``MemoryTool`` with ``AnthropicMemoryTool`` when
-        the provider is Anthropic so the model gets native structured tool-use
-        instead of the text-command loop.  The ``memory_dir`` and any custom
-        ``usage_instructions_to_llm`` are forwarded to the upgraded instance.
-        """
-        if self.model_provider != ModelProvider.ANTHROPIC:
-            return
-
-        # Local imports to avoid pulling Anthropic SDK into non-Anthropic paths
-        from microbots.tools.tool_definitions.memory_tool import MemoryTool
-        from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
-
-        upgraded = []
-        for tool in self.additional_tools:
-            if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool):
-                logger.info(
-                    "🧠 Auto-upgrading MemoryTool → AnthropicMemoryTool for Anthropic provider"
-                )
-                upgraded.append(AnthropicMemoryTool(
-                    memory_dir=tool.memory_dir,
-                    usage_instructions=tool.usage_instructions_to_llm,
-                ))
-            else:
-                upgraded.append(tool)
-        self.additional_tools = upgraded
-
     def _create_llm(self):
-        self._upgrade_tools_for_provider()
-
         # Append tool usage instructions to system prompt
         system_prompt_with_tools = self.system_prompt if self.system_prompt else ""
         if self.additional_tools:
@@ -365,16 +334,10 @@ def _create_llm(self):
                 system_prompt=system_prompt_with_tools, model_name=self.deployment_name
             )
         elif self.model_provider == ModelProvider.ANTHROPIC:
-            # Detect Anthropic-native tools (e.g. AnthropicMemoryTool) by duck-typing:
-            # any tool that exposes both to_dict() and call() is a native Anthropic tool.
-            native_tools = [
-                t for t in self.additional_tools
-                if callable(getattr(t, "to_dict", None)) and callable(getattr(t, "call", None))
-            ]
             self.llm = AnthropicApi(
                 system_prompt=system_prompt_with_tools,
                 deployment_name=self.deployment_name,
-                native_tools=native_tools or None,
+                additional_tools=self.additional_tools,
             )
         # No Else case required as model provider is already validated using _validate_model_and_provider
 
diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py
index 4a73ba5..a403f9c 100644
--- a/src/microbots/llm/anthropic_api.py
+++ b/src/microbots/llm/anthropic_api.py
@@ -21,12 +21,31 @@
 
 class AnthropicApi(LLMInterface):
 
+    def upgrade_tools(self, tools: list) -> list:
+        """Replace ``MemoryTool`` with ``AnthropicMemoryTool`` for native tool-use."""
+        from microbots.tools.tool_definitions.memory_tool import MemoryTool
+        from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
+
+        upgraded = []
+        for tool in tools:
+            if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool):
+                logger.info(
+                    "\U0001f9e0 Auto-upgrading MemoryTool \u2192 AnthropicMemoryTool for Anthropic provider"
+                )
+                upgraded.append(AnthropicMemoryTool(
+                    memory_dir=tool.memory_dir,
+                    usage_instructions=tool.usage_instructions_to_llm,
+                ))
+            else:
+                upgraded.append(tool)
+        return upgraded
+
     def __init__(
         self,
         system_prompt: str,
         deployment_name: str = deployment_name,
         max_retries: int = 3,
-        native_tools: Optional[List] = None,
+        additional_tools: Optional[List] = None,
     ):
         """
         Parameters
@@ -37,11 +56,10 @@ def __init__(
             The Anthropic model deployment name.
         max_retries : int
             Maximum number of retries for invalid LLM responses.
-        native_tools : Optional[List]
-            Anthropic-native tool objects (e.g. ``AnthropicMemoryTool``) that
-            have both a ``to_dict()`` and a ``call()`` method.  These are passed
-            directly to the API and their tool-use blocks are dispatched here
-            before the JSON response is returned to the caller.
+        additional_tools : Optional[List]
+            Tool objects passed from MicroBot.  Any provider-agnostic tools
+            (e.g. ``MemoryTool``) are silently upgraded to their Anthropic-
+            native variants, and their API schemas are extracted.
         """
         self.ai_client = Anthropic(
             api_key=api_key,
@@ -50,12 +68,18 @@ def __init__(
         self.deployment_name = deployment_name
         self.system_prompt = system_prompt
         self.messages = []
-        self.native_tools = native_tools or []
-        # Cache tool dicts once so _call_api and _dispatch_tool_use don't
-        # re-serialise on every invocation (important when multiple native
-        # tools are registered, e.g. memory + bash).
-        self._native_tool_dicts = [t.to_dict() for t in self.native_tools]
-        self._native_tools_by_name = {d["name"]: t for d, t in zip(self._native_tool_dicts, self.native_tools)}
+
+        # Silently upgrade tools in-place and extract API schemas
+        tools = additional_tools or []
+        upgraded = self.upgrade_tools(tools)
+        # Mutate the original list so the caller (MicroBot) sees upgraded tools
+        if additional_tools is not None:
+            additional_tools[:] = upgraded
+        self._tool_dicts = [
+            t.to_dict() for t in upgraded
+            if callable(getattr(t, "to_dict", None))
+        ]
+        self._pending_tool_response = None
 
         # Set these values here. This logic will be handled in the parent class.
         self.max_retries = max_retries
@@ -66,7 +90,7 @@ def __init__(
     # ---------------------------------------------------------------------- #
 
     def _call_api(self) -> object:
-        """Call the Anthropic messages API, including native tools when present."""
+        """Call the Anthropic messages API, including tool definitions when present."""
         kwargs = dict(
             model=self.deployment_name,
             system=self.system_prompt,
@@ -74,44 +98,24 @@ def _call_api(self) -> object:
             max_tokens=4096,
         )
 
-        if self.native_tools:
-            kwargs["tools"] = self._native_tool_dicts
+        if self._tool_dicts:
+            kwargs["tools"] = self._tool_dicts
 
         return self.ai_client.messages.create(**kwargs)
 
-    def _dispatch_tool_use(self, response) -> None:
-        """Handle a tool_use response: execute each tool call and append results.
+    def _append_tool_result(self, response, result_text: str) -> None:
+        """Append the assistant tool_use turn and the corresponding tool_result user turn.
 
-        Mutates ``self.messages`` in place — appends the assistant turn (with
-        all content blocks) and the corresponding tool_result user turn.
+        Called when the caller provides the tool execution result via
+        the next ``ask()`` call.
         """
-        # Append the full assistant message as-is (content is a list of blocks)
         assistant_content = [block.model_dump() for block in response.content]
         self.messages.append({"role": "assistant", "content": assistant_content})
 
-        # Build tool_result entries for every tool_use block
         tool_results = []
         for block in response.content:
             if block.type != "tool_use":
                 continue
-
-            # Find the matching native tool by name
-            tool = self._native_tools_by_name.get(block.name)
-            if tool is None:
-                result_text = f"Error: unknown tool '{block.name}'"
-                logger.error("Received tool_use for unknown tool: %s", block.name)
-            else:
-                try:
-                    result_text = tool.call(block.input)
-                    logger.info(
-                        "🧠 Native tool '%s' executed. Result (first 200 chars): %s",
-                        block.name,
-                        str(result_text)[:200],
-                    )
-                except Exception as exc:
-                    result_text = f"Error executing tool '{block.name}': {exc}"
-                    logger.error("Native tool '%s' raised: %s", block.name, exc)
-
             tool_results.append({
                 "type": "tool_result",
                 "tool_use_id": block.id,
@@ -127,18 +131,39 @@ def _dispatch_tool_use(self, response) -> None:
     def ask(self, message: str) -> LLMAskResponse:
         self.retries = 0  # reset retries for each ask. Handled in parent class.
 
-        self.messages.append({"role": "user", "content": message})
+        if self._pending_tool_response:
+            # Previous response was tool_use — format this message as tool results.
+            self._append_tool_result(self._pending_tool_response, message)
+            self._pending_tool_response = None
+        else:
+            self.messages.append({"role": "user", "content": message})
 
         valid = False
         while not valid:
             response = self._call_api()
 
-            # Dispatch any tool_use rounds before looking for a JSON response.
-            # The model may call the memory tool multiple times before producing
-            # its final JSON command.
-            while response.stop_reason == "tool_use":
-                self._dispatch_tool_use(response)
-                response = self._call_api()
+            if response.stop_reason == "tool_use":
+                # Return tool call info as an LLMAskResponse so the
+                # caller (MicroBot.run) can dispatch the tool.
+                self._pending_tool_response = response
+
+                thoughts = ""
+                for block in response.content:
+                    if block.type == "text":
+                        thoughts = block.text
+                        break
+
+                tool_calls = []
+                for block in response.content:
+                    if block.type == "tool_use":
+                        tool_calls.append({
+                            "name": block.name,
+                            "id": block.id,
+                            "input": block.input,
+                        })
+
+                command = json.dumps({"native_tool_calls": tool_calls})
+                return LLMAskResponse(task_done=False, thoughts=thoughts, command=command)
 
             # Extract text content from the final response
             response_text = ""
diff --git a/src/microbots/llm/llm.py b/src/microbots/llm/llm.py
index 2800790..e7e5c22 100644
--- a/src/microbots/llm/llm.py
+++ b/src/microbots/llm/llm.py
@@ -29,6 +29,15 @@ def ask(self, message: str) -> LLMAskResponse:
     def clear_history(self) -> bool:
         pass
 
+    def upgrade_tools(self, tools: list) -> list:
+        """Upgrade tools for the specific LLM provider.
+
+        The default implementation is a no-op.  Subclasses (e.g.
+        ``AnthropicApi``) override this to swap provider-agnostic tools
+        with their native equivalents.
+        """
+        return tools
+
     def _validate_llm_response(self, response: str) -> tuple[bool, LLMAskResponse]:
 
         if self.retries >= self.max_retries:
diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
index 9cf547e..ef26182 100644
--- a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
+++ b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
@@ -21,8 +21,9 @@
 The memory tool (type ``memory_20250818``) is available in the standard
 Anthropic library and does not require a beta endpoint or header.  Pass it
 via ``tools=[{"type": "memory_20250818", "name": "memory"}]`` on a regular
-``client.messages.create(...)`` call.  ``AnthropicApi`` handles this
-automatically when ``native_tools`` contains an ``AnthropicMemoryTool``.
+``client.messages.create(...)`` call.  ``MicroBot`` auto-upgrades
+``MemoryTool`` to ``AnthropicMemoryTool`` for Anthropic providers and
+passes the tool schema to ``AnthropicApi`` via ``tool_dicts``.
 
 Usage:
     from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
@@ -33,6 +34,7 @@
 
 from __future__ import annotations
 
+import json
 from logging import getLogger
 from pathlib import Path
 
@@ -48,6 +50,7 @@
     BetaMemoryTool20250818ViewCommand,
 )
 
+from microbots.environment.Environment import CmdReturn
 from microbots.tools.tool_definitions.memory_tool import MemoryTool
 
 logger = getLogger(__name__)
@@ -116,10 +119,36 @@ def is_model_supported(self, model_name: str) -> bool:
         return "claude" in model_name.lower()
 
     def is_invoked(self, command: str) -> bool:
-        """Return False — this tool is dispatched natively by AnthropicApi,
-        not via the shell command loop."""
+        """Return True when the command is a serialized native_tool_calls JSON
+        containing a call to the ``memory`` tool."""
+        try:
+            data = json.loads(command)
+            if "native_tool_calls" in data:
+                return any(tc["name"] == "memory" for tc in data["native_tool_calls"])
+        except (json.JSONDecodeError, KeyError, TypeError):
+            pass
         return False
 
+    def invoke(self, command: str, parent_bot) -> CmdReturn:
+        """Execute all memory tool calls in the serialized native_tool_calls batch."""
+        data = json.loads(command)
+        results = []
+        for tc in data["native_tool_calls"]:
+            if tc["name"] != "memory":
+                continue
+            try:
+                result = self.call(tc["input"])
+                logger.info(
+                    "\U0001f9e0 Native tool 'memory' executed. Result (first 200 chars): %s",
+                    str(result)[:200],
+                )
+                results.append(str(result))
+            except Exception as exc:
+                logger.error("Native tool 'memory' raised: %s", exc)
+                results.append(f"Error executing tool 'memory': {exc}")
+        combined = "\n".join(results)
+        return CmdReturn(stdout=combined, stderr="", return_code=0)
+
     def clear_all(self) -> None:
         """Delete all memory files (useful for testing or resetting state)."""
         self._clear()
diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py
new file mode 100644
index 0000000..25c717d
--- /dev/null
+++ b/test/bot/test_memory_tool_integration.py
@@ -0,0 +1,439 @@
+"""Tests for the Anthropic memory tool end-to-end flow.
+
+Unit tests (mocked API):
+  Verify wiring — auto-upgrade, tool dispatch, and memory file operations
+  with a mocked Anthropic client. Fast, free, no API key needed.
+
+Integration tests (real API):
+  Hit the actual Anthropic API to verify the full round-trip.
+  Gated behind ``@pytest.mark.anthropic_integration``.
+  Require ``ANTHROPIC_API_KEY`` in .env.
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+from unittest.mock import Mock, patch
+
+import pytest
+from dotenv import load_dotenv
+
+load_dotenv()
+
+sys.path.insert(
+    0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))
+)
+
+from microbots import MicroBot, BotRunResult
+from microbots.llm.llm import llm_output_format_str
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
+from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_tool_use_response(tool_name, tool_id, tool_input, thinking_text=""):
+    """Build a mock Anthropic API response with stop_reason='tool_use'."""
+    blocks = []
+
+    if thinking_text:
+        text_block = Mock()
+        text_block.type = "text"
+        text_block.text = thinking_text
+        blocks.append(text_block)
+
+    tool_block = Mock()
+    tool_block.type = "tool_use"
+    tool_block.name = tool_name
+    tool_block.id = tool_id
+    tool_block.input = tool_input
+    tool_block.model_dump = Mock(return_value={
+        "type": "tool_use",
+        "id": tool_id,
+        "name": tool_name,
+        "input": tool_input,
+    })
+    blocks.append(tool_block)
+
+    resp = Mock()
+    resp.stop_reason = "tool_use"
+    resp.content = blocks
+    return resp
+
+
+def _make_end_turn_response(task_done, thoughts, command=""):
+    """Build a mock Anthropic API response with stop_reason='end_turn'."""
+    payload = json.dumps({
+        "task_done": task_done,
+        "thoughts": thoughts,
+        "command": command,
+    })
+
+    text_block = Mock()
+    text_block.type = "text"
+    text_block.text = payload
+
+    resp = Mock()
+    resp.stop_reason = "end_turn"
+    resp.content = [text_block]
+    return resp
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolWiring:
+    """Unit tests — mocked Anthropic client, real tool dispatch and file ops."""
+
+    @pytest.fixture()
+    def memory_dir(self, tmp_path):
+        d = tmp_path / "memory"
+        d.mkdir()
+        return d
+
+    @pytest.fixture()
+    def bot(self, memory_dir):
+        """Create a MicroBot with Anthropic provider and a MemoryTool.
+
+        The Anthropic client is mocked, but the rest of the stack is real:
+        auto-upgrade, tool dispatch, and memory file operations.
+        """
+        tool = MemoryTool(
+            memory_dir=str(memory_dir),
+            usage_instructions_to_llm="Use the memory tool to persist notes.",
+        )
+
+        mock_env = Mock()
+        mock_env.execute.return_value = Mock(return_code=0, stdout="", stderr="")
+
+        anthropic_deployment = "claude-sonnet-4-5"
+
+        with patch("microbots.llm.anthropic_api.Anthropic") as mock_anthropic_cls, \
+             patch("microbots.llm.anthropic_api.api_key", "test-key"), \
+             patch("microbots.llm.anthropic_api.endpoint", "https://api.anthropic.com"), \
+             patch("microbots.llm.anthropic_api.deployment_name", anthropic_deployment):
+
+            bot = MicroBot(
+                model=f"anthropic/{anthropic_deployment}",
+                system_prompt="You are a helpful assistant.",
+                additional_tools=[tool],
+                environment=mock_env,
+            )
+
+            self._mock_client = mock_anthropic_cls.return_value
+            yield bot
+            del bot
+
+    # -- Upgrade verification -----------------------------------------------
+
+    def test_memory_tool_auto_upgraded_to_anthropic_variant(self, bot):
+        """MemoryTool passed to MicroBot should be auto-upgraded to AnthropicMemoryTool."""
+        upgraded_tools = bot.additional_tools
+        memory_tools = [t for t in upgraded_tools if isinstance(t, AnthropicMemoryTool)]
+        assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade"
+
+    def test_tool_dicts_include_memory_schema(self, bot):
+        """The LLM should have received the memory tool schema."""
+        assert len(bot.llm._tool_dicts) == 1
+        assert bot.llm._tool_dicts[0]["type"] == "memory_20250818"
+
+    # -- Create file via tool_use -------------------------------------------
+
+    def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir):
+        """LLM requests a memory create → MicroBot dispatches → file appears on disk."""
+        # Sequence:
+        # 1. ask(task) → API returns tool_use (memory create)
+        # 2. ask(tool_result) → API returns end_turn (task_done=True)
+        self._mock_client.messages.create.side_effect = [
+            _make_tool_use_response(
+                tool_name="memory",
+                tool_id="tool_001",
+                tool_input={
+                    "command": "create",
+                    "path": "/memories/notes.md",
+                    "file_text": "Hello from integration test",
+                },
+                thinking_text="I'll save a note to memory.",
+            ),
+            _make_end_turn_response(
+                task_done=True,
+                thoughts="Saved a note to memory successfully.",
+            ),
+        ]
+
+        result: BotRunResult = bot.run(
+            "Save a note saying 'Hello from integration test'",
+            max_iterations=5,
+            timeout_in_seconds=30,
+        )
+
+        assert result.status is True
+        assert result.error is None
+
+        # Verify the file was actually created on disk
+        # _resolve("/memories/notes.md") strips the "memories/" prefix → memory_dir/notes.md
+        created_file = memory_dir / "notes.md"
+        assert created_file.exists(), f"Expected {created_file} to be created"
+        assert created_file.read_text() == "Hello from integration test"
+
+    # -- View file via tool_use ---------------------------------------------
+
+    def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir):
+        """LLM requests a memory view → MicroBot dispatches → file content returned."""
+        # Pre-create a file in memory
+        # _resolve("/memories/existing.md") → memory_dir/existing.md
+        (memory_dir / "existing.md").write_text("Previously saved content")
+
+        self._mock_client.messages.create.side_effect = [
+            _make_tool_use_response(
+                tool_name="memory",
+                tool_id="tool_002",
+                tool_input={
+                    "command": "view",
+                    "path": "/memories/existing.md",
+                },
+                thinking_text="Let me check my memory.",
+            ),
+            _make_end_turn_response(
+                task_done=True,
+                thoughts="Found previously saved content in memory.",
+            ),
+        ]
+
+        result: BotRunResult = bot.run(
+            "Check your memory for existing notes",
+            max_iterations=5,
+            timeout_in_seconds=30,
+        )
+
+        assert result.status is True
+
+        # Verify the view result was passed back to the API as tool_result
+        calls = self._mock_client.messages.create.call_args_list
+        assert len(calls) == 2
+        # The second call should have messages including the tool_result
+        second_call_messages = calls[1].kwargs.get("messages") or calls[1][1].get("messages", [])
+        tool_result_msgs = [
+            m for m in second_call_messages
+            if m.get("role") == "user" and isinstance(m.get("content"), list)
+            and any(c.get("type") == "tool_result" for c in m["content"])
+        ]
+        assert len(tool_result_msgs) >= 1, "Expected a tool_result message in the second API call"
+        # The tool_result content should contain the file content
+        tool_result_content = tool_result_msgs[-1]["content"][0]["content"]
+        assert "Previously saved content" in tool_result_content
+
+    # -- Multiple tool calls in sequence ------------------------------------
+
+    def test_create_then_view_memory_file(self, bot, memory_dir):
+        """LLM creates a file, then views it — both dispatched via MicroBot loop."""
+        self._mock_client.messages.create.side_effect = [
+            # Step 1: create file
+            _make_tool_use_response(
+                tool_name="memory",
+                tool_id="tool_003",
+                tool_input={
+                    "command": "create",
+                    "path": "/memories/todo.md",
+                    "file_text": "- Fix bug #42\n- Write tests",
+                },
+                thinking_text="Creating a todo list.",
+            ),
+            # Step 2: view file
+            _make_tool_use_response(
+                tool_name="memory",
+                tool_id="tool_004",
+                tool_input={
+                    "command": "view",
+                    "path": "/memories/todo.md",
+                },
+                thinking_text="Let me verify what I wrote.",
+            ),
+            # Step 3: done
+            _make_end_turn_response(
+                task_done=True,
+                thoughts="Created and verified the todo list.",
+            ),
+        ]
+
+        result: BotRunResult = bot.run(
+            "Create a todo list and verify it was saved",
+            max_iterations=10,
+            timeout_in_seconds=30,
+        )
+
+        assert result.status is True
+        assert result.error is None
+
+        # File should exist with correct content
+        created_file = memory_dir / "todo.md"
+        assert created_file.exists()
+        assert "Fix bug #42" in created_file.read_text()
+
+    # -- Non-memory commands still go to environment ------------------------
+
+    def test_non_memory_commands_go_to_environment(self, bot):
+        """Regular shell commands should be dispatched to the environment, not the memory tool."""
+        self._mock_client.messages.create.side_effect = [
+            _make_end_turn_response(
+                task_done=False,
+                thoughts="Let me check the files.",
+                command="ls -la",
+            ),
+            _make_end_turn_response(
+                task_done=True,
+                thoughts="Done.",
+            ),
+        ]
+
+        result: BotRunResult = bot.run(
+            "List the files",
+            max_iterations=5,
+            timeout_in_seconds=30,
+        )
+
+        assert result.status is True
+        # The environment.execute should have been called with "ls -la"
+        bot.environment.execute.assert_called_with("ls -la")
+
+
+# ---------------------------------------------------------------------------
+# Real integration tests — require ANTHROPIC_API_KEY
+# ---------------------------------------------------------------------------
+
+MEMORY_SYSTEM_PROMPT = f"""You are a helpful assistant with access to a memory tool.
+You can save and retrieve notes using the memory tool.
+All your responses must be in this JSON format:
+{llm_output_format_str}
+The properties (task_done, thoughts, command) are mandatory on each response.
+When you are done, set task_done to true and command to an empty string.
+"""
+
+
+@pytest.mark.anthropic_integration
+@pytest.mark.docker
+class TestMemoryToolRealApi:
+    """End-to-end integration tests that hit the real Anthropic API.
+
+    These tests exercise the full MicroBot → AnthropicApi → memory tool
+    pipeline with no mocking.  A real Docker environment is created
+    (matching the AgentBoss integration test pattern).
+
+    Run with::
+
+        pytest -m anthropic_integration
+
+    Requires ``ANTHROPIC_API_KEY`` in ``.env``.
+    """
+
+    @pytest.fixture()
+    def memory_dir(self, tmp_path):
+        d = tmp_path / "memory"
+        d.mkdir()
+        return d
+
+    @pytest.fixture()
+    def memory_bot(self, memory_dir):
+        """Create a MicroBot with the real Anthropic API, real Docker env,
+        and a MemoryTool.  No mocking — fully end-to-end.
+        """
+        tool = MemoryTool(
+            memory_dir=str(memory_dir),
+            usage_instructions_to_llm="Use the memory tool to persist notes.",
+        )
+
+        anthropic_deployment = os.getenv("ANTHROPIC_DEPLOYMENT_NAME", "claude-sonnet-4-5")
+
+        bot = MicroBot(
+            model=f"anthropic/{anthropic_deployment}",
+            system_prompt=MEMORY_SYSTEM_PROMPT,
+            additional_tools=[tool],
+        )
+
+        yield bot
+        del bot
+
+    def test_memory_tool_auto_upgraded(self, memory_bot):
+        """MemoryTool should be silently auto-upgraded to AnthropicMemoryTool."""
+        memory_tools = [t for t in memory_bot.additional_tools if isinstance(t, AnthropicMemoryTool)]
+        assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade"
+
+    def test_create_memory_file(self, memory_bot, memory_dir):
+        """MicroBot should persist a debugging plan to memory.
+
+        The LLM is expected to:
+          1. Receive a task about planning a debugging session.
+          2. Decide to persist the plan using the memory tool.
+          3. Confirm the task is done.
+
+        We verify the plan was actually written to disk.
+        """
+        result: BotRunResult = memory_bot.run(
+            task=(
+                "You are investigating a bug where the server returns HTTP 500 "
+                "on POST /api/users. Create a debugging plan that includes: "
+                "1) check server logs, 2) reproduce the request with curl, "
+                "3) inspect the database connection. "
+                "Persist this plan so you can resume later if interrupted."
+            ),
+            max_iterations=10,
+            timeout_in_seconds=60,
+        )
+
+        assert result.status is True, f"Task failed: {result.error}"
+        assert result.error is None
+
+        # The LLM should have used the memory tool to persist the plan
+        saved_files = [f for f in memory_dir.rglob("*") if f.is_file()]
+        assert len(saved_files) >= 1, (
+            f"Expected at least one file created in memory. "
+            f"Found: {saved_files}"
+        )
+        combined_content = "\n".join(f.read_text() for f in saved_files).lower()
+        assert "log" in combined_content or "curl" in combined_content or "database" in combined_content, (
+            f"Expected debugging plan content in memory files. Content: {combined_content}"
+        )
+
+    def test_create_and_view_roundtrip(self, memory_bot, memory_dir):
+        """MicroBot should save findings and then review them before reporting.
+
+        The LLM is expected to:
+          1. Record analysis findings using the memory tool.
+          2. Review what it recorded to verify nothing was missed.
+          3. Summarize the findings in its final thoughts.
+
+        We verify:
+          - At least one file was written to disk.
+          - The LLM's summary references the recorded findings.
+        """
+        result: BotRunResult = memory_bot.run(
+            task=(
+                "You analyzed a Python project and found these issues: "
+                "1) an unused import 'os' in utils.py, "
+                "2) a missing null check in handler.py line 42. "
+                "Record these findings, then review your notes and "
+                "summarize what you found in your final thoughts."
+            ),
+            max_iterations=15,
+            timeout_in_seconds=60,
+        )
+
+        assert result.status is True, f"Task failed: {result.error}"
+        assert result.error is None
+
+        # The LLM should have created at least one memory file
+        saved_files = [f for f in memory_dir.rglob("*") if f.is_file()]
+        assert len(saved_files) >= 1, (
+            f"Expected at least one file in memory. "
+            f"Found: {list(memory_dir.rglob('*'))}"
+        )
+
+        result_lower = result.result.lower()
+        assert "import" in result_lower or "null" in result_lower or "handler" in result_lower, (
+            f"LLM should have summarized the findings. Got: {result.result}"
+        )
diff --git a/test/bot/test_upgrade_tools_for_provider.py b/test/bot/test_upgrade_tools_for_provider.py
index f5d1e98..459afd4 100644
--- a/test/bot/test_upgrade_tools_for_provider.py
+++ b/test/bot/test_upgrade_tools_for_provider.py
@@ -1,13 +1,7 @@
-"""
-Unit tests for MicroBot._upgrade_tools_for_provider.
+"""Unit tests for AnthropicApi.upgrade_tools() method.
 
 These tests verify that plain ``MemoryTool`` instances are automatically
-replaced with ``AnthropicMemoryTool`` when the model provider is Anthropic,
-and that no changes are made for other providers or other tool types.
-
-All tests bypass the heavy MicroBot constructor (Docker environment, LLM
-creation) by constructing an uninitialized instance with ``object.__new__``
-and manually setting only the attributes the method under test needs.
+replaced with ``AnthropicMemoryTool`` when using ``AnthropicApi.upgrade_tools``.
 """
 import sys
 import os
@@ -17,8 +11,7 @@
 
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")))
 
-from microbots.MicroBot import MicroBot
-from microbots.constants import ModelProvider
+from microbots.llm.anthropic_api import AnthropicApi
 from microbots.tools.tool_definitions.memory_tool import MemoryTool
 from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
 
@@ -27,15 +20,6 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
-def _bare_microbot(model_provider: str, tools: list) -> MicroBot:
-    """Return an uninitialized MicroBot with only the attributes that
-    ``_upgrade_tools_for_provider`` inspects."""
-    bot = object.__new__(MicroBot)
-    bot.model_provider = model_provider
-    bot.additional_tools = list(tools)
-    return bot
-
-
 def _memory_tool(tmp_path, instructions: str = "default instructions") -> MemoryTool:
     return MemoryTool(
         memory_dir=str(tmp_path / "memory"),
@@ -50,112 +34,84 @@ def _memory_tool(tmp_path, instructions: str = "default instructions") -> Memory
 @pytest.mark.unit
 class TestUpgradeToolsForProvider:
 
-    # -- Anthropic provider: MemoryTool → AnthropicMemoryTool ---------------
+    @pytest.fixture(autouse=True)
+    def _create_api(self):
+        with patch("microbots.llm.anthropic_api.Anthropic"):
+            self.api = AnthropicApi(system_prompt="test")
+
+    # -- AnthropicApi.upgrade_tools: MemoryTool → AnthropicMemoryTool --------
 
     def test_memory_tool_is_replaced_with_anthropic_variant(self, tmp_path):
         tool = _memory_tool(tmp_path)
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool])
 
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([tool])
 
-        assert len(bot.additional_tools) == 1
-        assert isinstance(bot.additional_tools[0], AnthropicMemoryTool)
+        assert len(upgraded) == 1
+        assert isinstance(upgraded[0], AnthropicMemoryTool)
 
     def test_memory_dir_is_forwarded_to_upgraded_tool(self, tmp_path):
         mem_dir = str(tmp_path / "my_memory")
         tool = MemoryTool(memory_dir=mem_dir)
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool])
 
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([tool])
 
-        upgraded = bot.additional_tools[0]
-        assert isinstance(upgraded, AnthropicMemoryTool)
-        assert str(upgraded.memory_dir) == mem_dir
+        assert isinstance(upgraded[0], AnthropicMemoryTool)
+        assert str(upgraded[0].memory_dir) == mem_dir
 
     def test_usage_instructions_are_forwarded_to_upgraded_tool(self, tmp_path):
         custom_instructions = "custom memory instructions for test"
         tool = _memory_tool(tmp_path, instructions=custom_instructions)
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool])
 
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([tool])
 
-        upgraded = bot.additional_tools[0]
-        assert upgraded.usage_instructions_to_llm == custom_instructions
+        assert upgraded[0].usage_instructions_to_llm == custom_instructions
 
     def test_already_anthropic_memory_tool_is_not_re_upgraded(self, tmp_path):
         existing = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory"))
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [existing])
 
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([existing])
 
-        assert len(bot.additional_tools) == 1
-        assert bot.additional_tools[0] is existing
+        assert len(upgraded) == 1
+        assert upgraded[0] is existing
 
     def test_non_memory_tools_are_kept_unchanged(self, tmp_path):
         other_tool = Mock()
         other_tool.__class__ = Mock  # not a MemoryTool subclass
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [other_tool])
 
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([other_tool])
 
-        assert len(bot.additional_tools) == 1
-        assert bot.additional_tools[0] is other_tool
+        assert len(upgraded) == 1
+        assert upgraded[0] is other_tool
 
     def test_mixed_tool_list_upgrades_only_memory_tools(self, tmp_path):
         plain_memory = _memory_tool(tmp_path)
         already_upgraded = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory2"))
         other_tool = Mock(spec=[])
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [plain_memory, already_upgraded, other_tool])
 
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([plain_memory, already_upgraded, other_tool])
 
-        assert len(bot.additional_tools) == 3
+        assert len(upgraded) == 3
         # first: should have been upgraded
-        assert isinstance(bot.additional_tools[0], AnthropicMemoryTool)
-        assert bot.additional_tools[0] is not plain_memory
+        assert isinstance(upgraded[0], AnthropicMemoryTool)
+        assert upgraded[0] is not plain_memory
         # second: already AnthropicMemoryTool, untouched
-        assert bot.additional_tools[1] is already_upgraded
+        assert upgraded[1] is already_upgraded
         # third: non-memory tool, untouched
-        assert bot.additional_tools[2] is other_tool
+        assert upgraded[2] is other_tool
 
     def test_empty_tool_list_is_a_no_op(self):
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [])
-
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([])
 
-        assert bot.additional_tools == []
+        assert upgraded == []
 
     def test_logger_info_called_for_each_upgraded_tool(self, tmp_path, caplog):
         tool1 = _memory_tool(tmp_path)
         tmp_path2 = tmp_path / "sub"
         tmp_path2.mkdir()
         tool2 = _memory_tool(tmp_path2)
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool1, tool2])
 
-        with caplog.at_level(logging.INFO, logger=" MicroBot "):
-            bot._upgrade_tools_for_provider()
+        with caplog.at_level(logging.INFO):
+            self.api.upgrade_tools([tool1, tool2])
 
         upgrade_logs = [r for r in caplog.records if "Auto-upgrading" in r.message]
         assert len(upgrade_logs) == 2
-
-    # -- Non-Anthropic providers: no upgrade should happen ------------------
-
-    @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL])
-    def test_no_upgrade_for_non_anthropic_provider(self, tmp_path, provider):
-        tool = _memory_tool(tmp_path)
-        bot = _bare_microbot(provider, [tool])
-
-        bot._upgrade_tools_for_provider()
-
-        assert len(bot.additional_tools) == 1
-        assert isinstance(bot.additional_tools[0], MemoryTool)
-        assert not isinstance(bot.additional_tools[0], AnthropicMemoryTool)
-
-    @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL])
-    def test_original_tool_identity_preserved_for_non_anthropic(self, tmp_path, provider):
-        tool = _memory_tool(tmp_path)
-        bot = _bare_microbot(provider, [tool])
-
-        bot._upgrade_tools_for_provider()
-
-        assert bot.additional_tools[0] is tool
diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py
index 49674aa..2c11966 100644
--- a/test/llm/test_anthropic_api.py
+++ b/test/llm/test_anthropic_api.py
@@ -540,63 +540,37 @@ def test_anthropic_api_clear_history_integration(self):
 # ============================================================================
 
 @pytest.mark.unit
-class TestAnthropicApiNativeToolsInit:
-    """Tests for __init__ native_tools caching."""
+class TestAnthropicApiToolDictsInit:
+    """Tests for __init__ tool upgrade and tool_dicts extraction."""
 
     @pytest.fixture(autouse=True)
     def _use_patch(self, patch_anthropic_config):
         pass
 
-    def test_init_without_native_tools_has_empty_caches(self):
+    def test_init_without_additional_tools_has_empty_tool_dicts(self):
         api = AnthropicApi(system_prompt="test")
 
-        assert api.native_tools == []
-        assert api._native_tool_dicts == []
-        assert api._native_tools_by_name == {}
+        assert api._tool_dicts == []
 
-    def test_init_with_none_native_tools_has_empty_caches(self):
-        api = AnthropicApi(system_prompt="test", native_tools=None)
+    def test_init_with_none_additional_tools_has_empty_tool_dicts(self):
+        api = AnthropicApi(system_prompt="test", additional_tools=None)
 
-        assert api._native_tool_dicts == []
-        assert api._native_tools_by_name == {}
+        assert api._tool_dicts == []
 
-    def test_init_with_single_native_tool_caches_dict(self):
+    def test_init_with_tool_having_to_dict_extracts_dicts(self):
         tool = Mock()
         tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"}
+        # Ensure it's not a MemoryTool so upgrade_tools won't touch it
+        tool.__class__ = Mock
 
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+        api = AnthropicApi(system_prompt="test", additional_tools=[tool])
 
-        assert api._native_tool_dicts == [{"name": "memory", "type": "memory_20250818"}]
+        assert api._tool_dicts == [{"name": "memory", "type": "memory_20250818"}]
 
-    def test_init_with_single_native_tool_caches_by_name(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
-
-        assert "memory" in api._native_tools_by_name
-        assert api._native_tools_by_name["memory"] is tool
-
-    def test_init_with_multiple_native_tools_caches_all(self):
-        tool1 = Mock()
-        tool1.to_dict.return_value = {"name": "memory"}
-        tool2 = Mock()
-        tool2.to_dict.return_value = {"name": "bash"}
-
-        api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2])
-
-        assert len(api._native_tool_dicts) == 2
-        assert api._native_tools_by_name["memory"] is tool1
-        assert api._native_tools_by_name["bash"] is tool2
-
-    def test_init_calls_to_dict_exactly_once_per_tool(self):
-        """to_dict() must not be called again on subsequent API calls."""
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-
-        AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_init_sets_pending_tool_response_to_none(self):
+        api = AnthropicApi(system_prompt="test")
 
-        assert tool.to_dict.call_count == 1
+        assert api._pending_tool_response is None
 
 
 @pytest.mark.unit
@@ -617,10 +591,10 @@ def test_call_api_without_tools_omits_tools_kwarg(self):
         call_kwargs = api.ai_client.messages.create.call_args[1]
         assert "tools" not in call_kwargs
 
-    def test_call_api_with_tools_passes_cached_dicts(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"}
-        api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool])
+    def test_call_api_with_tool_dicts_passes_them(self):
+        dicts = [{"name": "memory", "type": "memory_20250818"}]
+        api = AnthropicApi(system_prompt="test", deployment_name="claude-3")
+        api._tool_dicts = dicts
         api.messages = [{"role": "user", "content": "hello"}]
         api.ai_client.messages.create = Mock(return_value=Mock())
 
@@ -628,35 +602,17 @@ def test_call_api_with_tools_passes_cached_dicts(self):
 
         call_kwargs = api.ai_client.messages.create.call_args[1]
         assert "tools" in call_kwargs
-        assert call_kwargs["tools"] == [{"name": "memory", "type": "memory_20250818"}]
-
-    def test_call_api_does_not_call_to_dict_again(self):
-        """to_dict() should only be called during __init__, never during _call_api."""
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool])
-        api.messages = [{"role": "user", "content": "hello"}]
-        api.ai_client.messages.create = Mock(return_value=Mock())
-
-        count_after_init = tool.to_dict.call_count  # should be 1
-        api._call_api()
-        api._call_api()
-
-        assert tool.to_dict.call_count == count_after_init  # no increase
+        assert call_kwargs["tools"] == dicts
 
 
 @pytest.mark.unit
-class TestAnthropicApiDispatchToolUse:
-    """Tests for _dispatch_tool_use."""
+class TestAnthropicApiAppendToolResult:
+    """Tests for _append_tool_result."""
 
     @pytest.fixture(autouse=True)
     def _use_patch(self, patch_anthropic_config):
         pass
 
-    # ------------------------------------------------------------------ #
-    # Helpers
-    # ------------------------------------------------------------------ #
-
     @staticmethod
     def _tool_use_block(name, tool_id="tu_001", input_data=None):
         block = Mock()
@@ -675,31 +631,21 @@ def _text_block(text="hello"):
         block.model_dump.return_value = {"type": "text", "text": text}
         return block
 
-    # ------------------------------------------------------------------ #
-    # Tests
-    # ------------------------------------------------------------------ #
-
-    def test_dispatch_appends_assistant_message_first(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "ok"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_appends_assistant_message_first(self):
+        api = AnthropicApi(system_prompt="test")
 
         response = Mock()
         response.content = [self._tool_use_block("memory")]
-        api._dispatch_tool_use(response)
+        api._append_tool_result(response, "ok")
 
         assert api.messages[0]["role"] == "assistant"
 
-    def test_dispatch_appends_tool_result_user_message(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "file listing"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_appends_tool_result_user_message(self):
+        api = AnthropicApi(system_prompt="test")
 
         response = Mock()
         response.content = [self._tool_use_block("memory", tool_id="tu_abc")]
-        api._dispatch_tool_use(response)
+        api._append_tool_result(response, "file listing")
 
         user_msg = api.messages[1]
         assert user_msg["role"] == "user"
@@ -707,100 +653,53 @@ def test_dispatch_appends_tool_result_user_message(self):
         assert user_msg["content"][0]["tool_use_id"] == "tu_abc"
         assert user_msg["content"][0]["content"] == "file listing"
 
-    def test_dispatch_calls_tool_with_correct_input(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "ok"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
-
-        input_data = {"command": "view", "path": "/memories"}
-        response = Mock()
-        response.content = [self._tool_use_block("memory", input_data=input_data)]
-        api._dispatch_tool_use(response)
-
-        tool.call.assert_called_once_with(input_data)
-
-    def test_dispatch_unknown_tool_returns_error_in_result(self):
-        api = AnthropicApi(system_prompt="test")  # no native tools
-
-        response = Mock()
-        response.content = [self._tool_use_block("unknown_tool", tool_id="tu_err")]
-        api._dispatch_tool_use(response)
-
-        content = api.messages[1]["content"][0]["content"]
-        assert "Error" in content
-        assert "unknown_tool" in content
-
-    def test_dispatch_tool_exception_returns_error_message(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.side_effect = RuntimeError("disk full")
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
-
-        response = Mock()
-        response.content = [self._tool_use_block("memory", tool_id="tu_exc")]
-        api._dispatch_tool_use(response)
-
-        content = api.messages[1]["content"][0]["content"]
-        assert "Error" in content
-        assert "disk full" in content
-
-    def test_dispatch_skips_non_tool_use_content_blocks(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "result"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_skips_non_tool_use_content_blocks(self):
+        api = AnthropicApi(system_prompt="test")
 
         response = Mock()
         response.content = [
             self._text_block("thinking..."),
             self._tool_use_block("memory", tool_id="tu_only"),
         ]
-        api._dispatch_tool_use(response)
+        api._append_tool_result(response, "result")
 
         tool_results = api.messages[1]["content"]
         assert len(tool_results) == 1
         assert tool_results[0]["tool_use_id"] == "tu_only"
 
-    def test_dispatch_handles_multiple_tool_use_blocks(self):
-        tool1 = Mock()
-        tool1.to_dict.return_value = {"name": "memory"}
-        tool1.call.return_value = "memory result"
-        tool2 = Mock()
-        tool2.to_dict.return_value = {"name": "bash"}
-        tool2.call.return_value = "bash result"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2])
+    def test_handles_multiple_tool_use_blocks(self):
+        api = AnthropicApi(system_prompt="test")
 
         response = Mock()
         response.content = [
             self._tool_use_block("memory", tool_id="id_1"),
             self._tool_use_block("bash", tool_id="id_2"),
         ]
-        api._dispatch_tool_use(response)
+        api._append_tool_result(response, "combined result")
 
         results = api.messages[1]["content"]
         assert len(results) == 2
         assert results[0]["tool_use_id"] == "id_1"
-        assert results[0]["content"] == "memory result"
+        assert results[0]["content"] == "combined result"
         assert results[1]["tool_use_id"] == "id_2"
-        assert results[1]["content"] == "bash result"
+        assert results[1]["content"] == "combined result"
 
 
 @pytest.mark.unit
-class TestAnthropicApiAskWithToolUseLoop:
-    """Tests for ask() cycling through tool_use rounds before returning JSON."""
+class TestAnthropicApiAskWithToolUse:
+    """Tests for ask() returning tool_use as LLMAskResponse and accepting tool results."""
 
     @pytest.fixture(autouse=True)
     def _use_patch(self, patch_anthropic_config):
         pass
 
     @staticmethod
-    def _tool_use_response(tool_name, tool_id):
+    def _tool_use_response(tool_name, tool_id, input_data=None):
         block = Mock()
         block.type = "tool_use"
         block.name = tool_name
         block.id = tool_id
-        block.input = {}
+        block.input = input_data or {}
         block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": tool_name}
         response = Mock()
         response.stop_reason = "tool_use"
@@ -818,40 +717,61 @@ def _text_response(json_dict):
         response.content = [block]
         return response
 
-    def test_ask_dispatches_one_tool_use_round_then_returns(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "viewed /memories"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_ask_returns_tool_use_as_ask_response(self):
+        dicts = [{"name": "memory", "type": "memory_20250818"}]
+        api = AnthropicApi(system_prompt="test")
+        api._tool_dicts = dicts
 
-        tool_resp = self._tool_use_response("memory", "tu_1")
-        final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""})
-        api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp])
+        tool_resp = self._tool_use_response("memory", "tu_1", {"command": "view", "path": "/memories"})
+        api.ai_client.messages.create = Mock(return_value=tool_resp)
 
         result = api.ask("do the task")
 
-        assert api.ai_client.messages.create.call_count == 2
-        tool.call.assert_called_once()
-        assert result.command == "ls /"
+        assert result.task_done is False
+        assert '"native_tool_calls"' in result.command
+        parsed = json.loads(result.command)
+        assert parsed["native_tool_calls"][0]["name"] == "memory"
+        assert parsed["native_tool_calls"][0]["id"] == "tu_1"
+        assert api._pending_tool_response is tool_resp
 
-    def test_ask_dispatches_multiple_tool_use_rounds(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "ok"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_ask_stores_pending_tool_response(self):
+        api = AnthropicApi(system_prompt="test")
+        api._tool_dicts = [{"name": "memory"}]
 
-        tool_resp1 = self._tool_use_response("memory", "tu_1")
-        tool_resp2 = self._tool_use_response("memory", "tu_2")
-        final_resp = self._text_response({"task_done": True, "command": "", "thoughts": "done"})
-        api.ai_client.messages.create = Mock(side_effect=[tool_resp1, tool_resp2, final_resp])
+        tool_resp = self._tool_use_response("memory", "tu_1")
+        api.ai_client.messages.create = Mock(return_value=tool_resp)
 
-        result = api.ask("do the task")
+        api.ask("do it")
 
-        assert api.ai_client.messages.create.call_count == 3
-        assert tool.call.call_count == 2
-        assert result.task_done is True
+        assert api._pending_tool_response is tool_resp
 
-    def test_ask_without_tool_use_does_not_dispatch(self):
+    def test_ask_with_pending_tool_response_formats_tool_result(self):
+        api = AnthropicApi(system_prompt="test")
+        api._tool_dicts = [{"name": "memory"}]
+
+        tool_resp = self._tool_use_response("memory", "tu_1")
+        final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""})
+        api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp])
+
+        # First ask — returns tool_use
+        api.ask("do the task")
+
+        # Second ask — sends tool result, formats as tool_result
+        result = api.ask("viewed /memories")
+
+        assert result.command == "ls /"
+        assert api._pending_tool_response is None
+
+        # Check messages contain the tool_result
+        tool_result_msgs = [
+            m for m in api.messages
+            if m["role"] == "user" and isinstance(m["content"], list)
+        ]
+        assert len(tool_result_msgs) == 1
+        assert tool_result_msgs[0]["content"][0]["type"] == "tool_result"
+        assert tool_result_msgs[0]["content"][0]["tool_use_id"] == "tu_1"
+
+    def test_ask_without_tool_use_works_normally(self):
         api = AnthropicApi(system_prompt="test")
 
         final_resp = self._text_response({"task_done": False, "command": "pwd", "thoughts": ""})
@@ -861,23 +781,34 @@ def test_ask_without_tool_use_does_not_dispatch(self):
 
         assert api.ai_client.messages.create.call_count == 1
         assert result.command == "pwd"
+        assert api._pending_tool_response is None
 
-    def test_ask_tool_use_messages_are_added_to_history(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "result"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_ask_extracts_thoughts_from_tool_use_response(self):
+        api = AnthropicApi(system_prompt="test")
+        api._tool_dicts = [{"name": "memory"}]
 
-        tool_resp = self._tool_use_response("memory", "tu_1")
-        final_resp = self._text_response({"task_done": False, "command": "echo hi", "thoughts": ""})
-        api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp])
+        # Build a tool_use response with a text block for thoughts
+        text_block = Mock()
+        text_block.type = "text"
+        text_block.text = "Let me check memory first"
+        text_block.model_dump.return_value = {"type": "text", "text": text_block.text}
 
-        api.ask("do it")
+        tool_block = Mock()
+        tool_block.type = "tool_use"
+        tool_block.name = "memory"
+        tool_block.id = "tu_1"
+        tool_block.input = {}
+        tool_block.model_dump.return_value = {"type": "tool_use", "id": "tu_1", "name": "memory"}
+
+        response = Mock()
+        response.stop_reason = "tool_use"
+        response.content = [text_block, tool_block]
+
+        api.ai_client.messages.create = Mock(return_value=response)
+
+        result = api.ask("do the task")
 
-        # Messages: user, assistant(tool_use), user(tool_result), assistant(final json)
-        roles = [m["role"] for m in api.messages]
-        assert roles.count("user") == 2
-        assert roles.count("assistant") == 2
+        assert result.thoughts == "Let me check memory first"
 
 
 if __name__ == "__main__":
diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py
index db7069e..ab27a68 100644
--- a/test/tools/tool_definitions/test_anthropic_memory_tool.py
+++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py
@@ -101,7 +101,19 @@ def test_returns_false_for_non_claude_models(self, tmp_path):
 @pytest.mark.unit
 class TestAnthropicMemoryToolIsInvoked:
 
-    def test_always_returns_false(self, tmp_path):
+    def test_returns_true_for_native_tool_calls_with_memory(self, tmp_path):
+        tool = make_tool(tmp_path)
+        import json
+        cmd = json.dumps({"native_tool_calls": [{"name": "memory", "id": "tu_1", "input": {}}]})
+        assert tool.is_invoked(cmd) is True
+
+    def test_returns_false_for_native_tool_calls_without_memory(self, tmp_path):
+        tool = make_tool(tmp_path)
+        import json
+        cmd = json.dumps({"native_tool_calls": [{"name": "bash", "id": "tu_1", "input": {}}]})
+        assert tool.is_invoked(cmd) is False
+
+    def test_returns_false_for_plain_commands(self, tmp_path):
         tool = make_tool(tmp_path)
         for cmd in ("memory view /memories", "memory clear", "anything", ""):
             assert tool.is_invoked(cmd) is False

From c7d2657a448580e706c7e9d4b203e4ae19ccb056 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Wed, 11 Mar 2026 06:15:00 +0000
Subject: [PATCH 09/10]  enhance MemoryTool path validation and logging

---
 .../tools/tool_definitions/__init__.py        |  2 +
 .../tools/tool_definitions/memory_tool.py     | 17 ++++--
 test/llm/test_llm.py                          | 30 +++++++++-
 .../test_anthropic_memory_tool.py             | 55 +++++++++++++++++++
 4 files changed, 98 insertions(+), 6 deletions(-)
 create mode 100644 src/microbots/tools/tool_definitions/__init__.py

diff --git a/src/microbots/tools/tool_definitions/__init__.py b/src/microbots/tools/tool_definitions/__init__.py
new file mode 100644
index 0000000..88acf0d
--- /dev/null
+++ b/src/microbots/tools/tool_definitions/__init__.py
@@ -0,0 +1,2 @@
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
+from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
\ No newline at end of file
diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py
index 310f880..b63f3ff 100644
--- a/src/microbots/tools/tool_definitions/memory_tool.py
+++ b/src/microbots/tools/tool_definitions/memory_tool.py
@@ -1,4 +1,5 @@
 import logging
+import os
 import shlex
 import shutil
 from pathlib import Path
@@ -110,19 +111,22 @@ def is_model_supported(self, model_name: str) -> bool:
     def _resolve(self, path: str) -> Path:
         """Resolve a /memories/… path to an absolute host path."""
         stripped = path.lstrip("/")
+
+        # Reject any path containing '..' components before resolving
+        if ".." in Path(stripped).parts:
+            raise ValueError(f"Path traversal not allowed: {path!r}")
+
         if stripped == "memories":
             rel = ""
         elif stripped.startswith("memories/"):
             rel = stripped[len("memories/"):]
-        elif stripped.startswith(("workdir", "home", "tmp", "var", "etc", "usr")):
-            raise ValueError(
-                f"Invalid memory path: {path!r}. Use paths under /memories/."
-            )
         else:
             rel = stripped  # treat as relative to memory_dir
 
         resolved = (self._memory_dir / rel).resolve() if rel else self._memory_dir.resolve()
-        if not str(resolved).startswith(str(self._memory_dir.resolve())):
+        # Use trailing separator to prevent prefix confusion with sibling dirs
+        memory_root = str(self._memory_dir.resolve())
+        if resolved != self._memory_dir.resolve() and not str(resolved).startswith(memory_root + os.sep):
             raise ValueError(f"Path traversal not allowed: {path!r}")
         return resolved
 
@@ -184,6 +188,7 @@ def _view(self, args: list) -> CmdReturn:
             elif args[i] == "--end" and i + 1 < len(args):
                 end_line = int(args[i + 1]); i += 2
             else:
+                logger.warning("🧠 MemoryTool view: unknown flag %r (skipped)", args[i])
                 i += 1
 
         resolved = self._resolve(path)
@@ -232,6 +237,7 @@ def _str_replace(self, args: list) -> CmdReturn:
             elif args[i] == "--new" and i + 1 < len(args):
                 new_text = args[i + 1]; i += 2
             else:
+                logger.warning("🧠 MemoryTool str_replace: unknown flag %r (skipped)", args[i])
                 i += 1
         if old_text is None or new_text is None:
             return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1)
@@ -259,6 +265,7 @@ def _insert(self, args: list) -> CmdReturn:
             elif args[i] == "--text" and i + 1 < len(args):
                 text = args[i + 1]; i += 2
             else:
+                logger.warning("🧠 MemoryTool insert: unknown flag %r (skipped)", args[i])
                 i += 1
         if line_num is None or text is None:
             return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1)
diff --git a/test/llm/test_llm.py b/test/llm/test_llm.py
index bd1b82d..af5a7d2 100644
--- a/test/llm/test_llm.py
+++ b/test/llm/test_llm.py
@@ -742,4 +742,32 @@ def test_task_done_true_with_not_none_command_field(self, llm):
         assert llm_response is None
         assert llm.retries == 1
         assert len(llm.messages) == 1
-        assert "When 'task_done' is true, 'command' should be an empty string." in llm.messages[0]["content"]
\ No newline at end of file
+
+
+@pytest.mark.unit
+class TestUpgradeToolsDefault:
+    """Tests for the default LLMInterface.upgrade_tools no-op implementation."""
+
+    @pytest.fixture
+    def llm(self):
+        return ConcreteLLM()
+
+    def test_returns_same_list(self, llm):
+        """Default upgrade_tools returns the input list unchanged."""
+        tools = ["tool_a", "tool_b"]
+        result = llm.upgrade_tools(tools)
+        assert result is tools
+
+    def test_empty_list(self, llm):
+        """Default upgrade_tools handles an empty list."""
+        tools = []
+        result = llm.upgrade_tools(tools)
+        assert result == []
+
+    def test_preserves_tool_order_and_identity(self, llm):
+        """Default upgrade_tools does not reorder or copy elements."""
+        sentinel = object()
+        tools = [sentinel, "other"]
+        result = llm.upgrade_tools(tools)
+        assert result[0] is sentinel
+        assert result[1] == "other"
\ No newline at end of file
diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py
index ab27a68..81db12c 100644
--- a/test/tools/tool_definitions/test_anthropic_memory_tool.py
+++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py
@@ -364,3 +364,58 @@ def test_rename_raises_runtime_error_on_failure(self, tmp_path):
         )
         with pytest.raises(RuntimeError):
             tool.rename(cmd)
+
+
+# ---------------------------------------------------------------------------
+# invoke — non-memory tool calls are skipped
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolInvoke:
+
+    def test_invoke_skips_non_memory_tool_calls(self, tmp_path):
+        """The ``if tc["name"] != "memory": continue`` branch is exercised
+        when native_tool_calls contains a non-memory tool."""
+        import json
+        from unittest.mock import Mock
+
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello")
+
+        command = json.dumps({
+            "native_tool_calls": [
+                {"name": "bash", "id": "tu_1", "input": {"command": "ls"}},
+                {"name": "memory", "id": "tu_2", "input": {
+                    "command": "view", "path": "/memories/f.md", "view_range": None,
+                }},
+            ]
+        })
+
+        result = tool.invoke(command, parent_bot=Mock())
+
+        assert result.return_code == 0
+        # Only the memory call should produce output; bash should be skipped
+        assert "hello" in result.stdout
+
+    def test_invoke_catches_exception_from_tool_call(self, tmp_path):
+        """The ``except Exception`` branch is exercised when tool.call() raises."""
+        import json
+        from unittest.mock import Mock, patch
+
+        tool = make_tool(tmp_path)
+
+        command = json.dumps({
+            "native_tool_calls": [
+                {"name": "memory", "id": "tu_1", "input": {
+                    "command": "view", "path": "/memories/nonexistent.md", "view_range": None,
+                }},
+            ]
+        })
+
+        # Force call() to raise an exception
+        with patch.object(tool, "call", side_effect=RuntimeError("boom")):
+            result = tool.invoke(command, parent_bot=Mock())
+
+        assert result.return_code == 0
+        assert "Error executing tool 'memory'" in result.stdout
+        assert "boom" in result.stdout

From 3067b59e77f40197fc0c70ae101205a2dd13ec61 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Wed, 11 Mar 2026 07:39:00 +0000
Subject: [PATCH 10/10]  enhance path validation

---
 src/microbots/tools/tool_definitions/memory_tool.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py
index b63f3ff..8131b8b 100644
--- a/src/microbots/tools/tool_definitions/memory_tool.py
+++ b/src/microbots/tools/tool_definitions/memory_tool.py
@@ -101,9 +101,6 @@ def __post_init__(self):
         self._memory_dir = base
         self._memory_dir.mkdir(parents=True, exist_ok=True)
 
-    def is_model_supported(self, model_name: str) -> bool:
-        return True
-
     # ---------------------------------------------------------------------- #
     # Path helpers
     # ---------------------------------------------------------------------- #
@@ -116,6 +113,11 @@ def _resolve(self, path: str) -> Path:
         if ".." in Path(stripped).parts:
             raise ValueError(f"Path traversal not allowed: {path!r}")
 
+        if path.startswith("/") and stripped != "memories" and not stripped.startswith("memories/"):
+            raise ValueError(
+                f"Invalid memory path: {path!r}. Use paths under /memories/."
+            )
+
         if stripped == "memories":
             rel = ""
         elif stripped.startswith("memories/"):