From 1c9b80920ce01333af12c6cd910fc7ab1bc91c89 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Tue, 3 Mar 2026 10:37:45 +0000
Subject: [PATCH 01/20] Implement generic Memory Tool and Anthropic-native
 memory tool

---
 src/microbots/MicroBot.py                     |  41 ++-
 src/microbots/llm/anthropic_api.py            | 123 ++++++-
 .../tool_definitions/anthropic_memory_tool.py | 184 +++++++++++
 .../tools/tool_definitions/memory_tool.py     | 312 ++++++++++++++++++
 4 files changed, 647 insertions(+), 13 deletions(-)
 create mode 100644 src/microbots/tools/tool_definitions/anthropic_memory_tool.py
 create mode 100644 src/microbots/tools/tool_definitions/memory_tool.py

diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py
index 3ace5a4..7b061d7 100644
--- a/src/microbots/MicroBot.py
+++ b/src/microbots/MicroBot.py
@@ -317,7 +317,38 @@ def _create_environment(self, folder_to_mount: Optional[Mount]):
             folder_to_mount=folder_to_mount,
         )
 
+    def _upgrade_tools_for_provider(self):
+        """Auto-upgrade provider-agnostic tools to their provider-optimised variants.
+
+        Currently: replaces any ``MemoryTool`` with ``AnthropicMemoryTool`` when
+        the provider is Anthropic so the model gets native structured tool-use
+        instead of the text-command loop.  The ``memory_dir`` and any custom
+        ``usage_instructions_to_llm`` are forwarded to the upgraded instance.
+        """
+        if self.model_provider != ModelProvider.ANTHROPIC:
+            return
+
+        # Local imports to avoid pulling Anthropic SDK into non-Anthropic paths
+        from microbots.tools.tool_definitions.memory_tool import MemoryTool
+        from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
+
+        upgraded = []
+        for tool in self.additional_tools:
+            if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool):
+                logger.info(
+                    "🧠 Auto-upgrading MemoryTool → AnthropicMemoryTool for Anthropic provider"
+                )
+                upgraded.append(AnthropicMemoryTool(
+                    memory_dir=tool.memory_dir,
+                    usage_instructions=tool.usage_instructions_to_llm,
+                ))
+            else:
+                upgraded.append(tool)
+        self.additional_tools = upgraded
+
     def _create_llm(self):
+        self._upgrade_tools_for_provider()
+
         # Append tool usage instructions to system prompt
         system_prompt_with_tools = self.system_prompt if self.system_prompt else ""
         if self.additional_tools:
@@ -334,8 +365,16 @@ def _create_llm(self):
                 system_prompt=system_prompt_with_tools, model_name=self.deployment_name
             )
         elif self.model_provider == ModelProvider.ANTHROPIC:
+            # Detect Anthropic-native tools (e.g. AnthropicMemoryTool) by duck-typing:
+            # any tool that exposes both to_dict() and call() is a native Anthropic tool.
+            native_tools = [
+                t for t in self.additional_tools
+                if callable(getattr(t, "to_dict", None)) and callable(getattr(t, "call", None))
+            ]
             self.llm = AnthropicApi(
-                system_prompt=system_prompt_with_tools, deployment_name=self.deployment_name
+                system_prompt=system_prompt_with_tools,
+                deployment_name=self.deployment_name,
+                native_tools=native_tools or None,
             )
         # No Else case required as model provider is already validated using _validate_model_and_provider
 
diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py
index f40118a..4a73ba5 100644
--- a/src/microbots/llm/anthropic_api.py
+++ b/src/microbots/llm/anthropic_api.py
@@ -1,7 +1,9 @@
 import json
 import os
+import re
 from dataclasses import asdict
 from logging import getLogger
+from typing import List, Optional
 
 from dotenv import load_dotenv
 from anthropic import Anthropic
@@ -16,9 +18,31 @@
 api_key = os.getenv("ANTHROPIC_API_KEY")
 
 
+
 class AnthropicApi(LLMInterface):
 
-    def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3):
+    def __init__(
+        self,
+        system_prompt: str,
+        deployment_name: str = deployment_name,
+        max_retries: int = 3,
+        native_tools: Optional[List] = None,
+    ):
+        """
+        Parameters
+        ----------
+        system_prompt : str
+            System prompt for the LLM.
+        deployment_name : str
+            The Anthropic model deployment name.
+        max_retries : int
+            Maximum number of retries for invalid LLM responses.
+        native_tools : Optional[List]
+            Anthropic-native tool objects (e.g. ``AnthropicMemoryTool``) that
+            have both a ``to_dict()`` and a ``call()`` method.  These are passed
+            directly to the API and their tool-use blocks are dispatched here
+            before the JSON response is returned to the caller.
+        """
         self.ai_client = Anthropic(
             api_key=api_key,
             base_url=endpoint
@@ -26,31 +50,106 @@ def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3
         self.deployment_name = deployment_name
         self.system_prompt = system_prompt
         self.messages = []
+        self.native_tools = native_tools or []
+        # Cache tool dicts once so _call_api and _dispatch_tool_use don't
+        # re-serialise on every invocation (important when multiple native
+        # tools are registered, e.g. memory + bash).
+        self._native_tool_dicts = [t.to_dict() for t in self.native_tools]
+        self._native_tools_by_name = {d["name"]: t for d, t in zip(self._native_tool_dicts, self.native_tools)}
 
         # Set these values here. This logic will be handled in the parent class.
         self.max_retries = max_retries
         self.retries = 0
 
-    def ask(self, message) -> LLMAskResponse:
+    # ---------------------------------------------------------------------- #
+    # Internal helpers
+    # ---------------------------------------------------------------------- #
+
+    def _call_api(self) -> object:
+        """Call the Anthropic messages API, including native tools when present."""
+        kwargs = dict(
+            model=self.deployment_name,
+            system=self.system_prompt,
+            messages=self.messages,
+            max_tokens=4096,
+        )
+
+        if self.native_tools:
+            kwargs["tools"] = self._native_tool_dicts
+
+        return self.ai_client.messages.create(**kwargs)
+
+    def _dispatch_tool_use(self, response) -> None:
+        """Handle a tool_use response: execute each tool call and append results.
+
+        Mutates ``self.messages`` in place — appends the assistant turn (with
+        all content blocks) and the corresponding tool_result user turn.
+        """
+        # Append the full assistant message as-is (content is a list of blocks)
+        assistant_content = [block.model_dump() for block in response.content]
+        self.messages.append({"role": "assistant", "content": assistant_content})
+
+        # Build tool_result entries for every tool_use block
+        tool_results = []
+        for block in response.content:
+            if block.type != "tool_use":
+                continue
+
+            # Find the matching native tool by name
+            tool = self._native_tools_by_name.get(block.name)
+            if tool is None:
+                result_text = f"Error: unknown tool '{block.name}'"
+                logger.error("Received tool_use for unknown tool: %s", block.name)
+            else:
+                try:
+                    result_text = tool.call(block.input)
+                    logger.info(
+                        "🧠 Native tool '%s' executed. Result (first 200 chars): %s",
+                        block.name,
+                        str(result_text)[:200],
+                    )
+                except Exception as exc:
+                    result_text = f"Error executing tool '{block.name}': {exc}"
+                    logger.error("Native tool '%s' raised: %s", block.name, exc)
+
+            tool_results.append({
+                "type": "tool_result",
+                "tool_use_id": block.id,
+                "content": str(result_text),
+            })
+
+        self.messages.append({"role": "user", "content": tool_results})
+
+    # ---------------------------------------------------------------------- #
+    # Public interface
+    # ---------------------------------------------------------------------- #
+
+    def ask(self, message: str) -> LLMAskResponse:
         self.retries = 0  # reset retries for each ask. Handled in parent class.
 
         self.messages.append({"role": "user", "content": message})
 
         valid = False
         while not valid:
-            response = self.ai_client.messages.create(
-                model=self.deployment_name,
-                system=self.system_prompt,
-                messages=self.messages,
-                max_tokens=4096,
-            )
-
-            # Extract text content from response
-            response_text = response.content[0].text if response.content else ""
+            response = self._call_api()
+
+            # Dispatch any tool_use rounds before looking for a JSON response.
+            # The model may call the memory tool multiple times before producing
+            # its final JSON command.
+            while response.stop_reason == "tool_use":
+                self._dispatch_tool_use(response)
+                response = self._call_api()
+
+            # Extract text content from the final response
+            response_text = ""
+            for block in response.content:
+                if block.type == "text":
+                    response_text = block.text
+                    break
+
             logger.debug("Raw Anthropic response (first 500 chars): %s", response_text[:500])
 
             # Try to extract JSON if wrapped in markdown code blocks
-            import re
             json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL)
             if json_match:
                 response_text = json_match.group(1)
diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
new file mode 100644
index 0000000..e80750c
--- /dev/null
+++ b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
@@ -0,0 +1,184 @@
+"""
+AnthropicMemoryTool — wraps Anthropic's memory tool.
+
+The memory tool lets the model persist information across conversations by
+reading and writing files in a local memory directory.  When the model invokes
+the tool, it sends a command (view, create, str_replace, insert, delete,
+rename) and the client executes it against a local filesystem directory.
+
+This implementation extends both:
+  - ``MemoryTool``:  provides all file-operation logic (_resolve, _view,
+    _create, _str_replace, _insert, _delete, _rename, _clear) and the
+    ToolAbstract duck-typing interface.
+  - ``BetaAbstractMemoryTool`` (SDK): provides native Anthropic dispatch and
+    the ``to_dict()`` / ``call()`` interface required by AnthropicApi.
+
+The SDK command-handler overrides (view, create, str_replace, insert, delete,
+rename) simply translate SDK command objects → arg lists and delegate to the
+inherited MemoryTool private methods, converting the CmdReturn back to a
+string as the SDK expects.
+
+The memory tool (type ``memory_20250818``) is available in the standard
+Anthropic library and does not require a beta endpoint or header.  Pass it
+via ``tools=[{"type": "memory_20250818", "name": "memory"}]`` on a regular
+``client.messages.create(...)`` call.  ``AnthropicApi`` handles this
+automatically when ``native_tools`` contains an ``AnthropicMemoryTool``.
+
+Usage:
+    from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
+
+    memory = AnthropicMemoryTool()
+    bot = ReadingBot(..., additional_tools=[memory])
+"""
+
+from __future__ import annotations
+
+from logging import getLogger
+from pathlib import Path
+
+from typing_extensions import override
+
+from anthropic.lib.tools import BetaAbstractMemoryTool as _SDKMemoryTool
+from anthropic.types.beta import (
+    BetaMemoryTool20250818CreateCommand,
+    BetaMemoryTool20250818DeleteCommand,
+    BetaMemoryTool20250818InsertCommand,
+    BetaMemoryTool20250818RenameCommand,
+    BetaMemoryTool20250818StrReplaceCommand,
+    BetaMemoryTool20250818ViewCommand,
+)
+
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
+
+logger = getLogger(__name__)
+
+DEFAULT_MEMORY_INSTRUCTIONS = (
+    "MEMORY PROTOCOL:\n"
+    "1. ALWAYS view your memory directory BEFORE doing anything else "
+    "using the `view` command of your `memory` tool to check for earlier progress.\n"
+    "2. As you make progress on the task, record status, progress, "
+    "and key findings in your memory using the memory tool.\n"
+    "3. ASSUME INTERRUPTION: Your context window might be reset at any moment, "
+    "so you risk losing any progress that is not recorded in your memory directory.\n"
+    "4. Before completing a task, always save your final results and analysis to memory.\n"
+    "5. When editing your memory folder, always keep its content up-to-date, coherent "
+    "and organized. Rename or delete files that are no longer relevant. "
+    "Do not create new files unless necessary.\n\n"
+    "IMPORTANT: The memory tool ONLY works with paths under /memories/. "
+    "Do NOT use the memory tool to access the repository or workdir. "
+    "Use shell commands (ls, cat, etc.) for filesystem access."
+)
+
+
+class AnthropicMemoryTool(MemoryTool, _SDKMemoryTool):
+    """
+    Anthropic's built-in memory tool, backed by MemoryTool's file logic.
+
+    Inherits file-operation logic from ``MemoryTool`` (plain Python class) and
+    the SDK's native dispatch interface from ``BetaAbstractMemoryTool``.
+
+    The SDK command-handler overrides delegate to the inherited private methods
+    (``_view``, ``_create``, etc.), translating the SDK ``Command`` objects to
+    the ``args: list`` format that those methods expect, and converting the
+    returned ``CmdReturn`` to the string that the SDK API requires.
+
+    Parameters
+    ----------
+    memory_dir : str | Path | None
+        Root directory for memory files.  Defaults to ``~/.microbots/memory``.
+    usage_instructions : str | None
+        Custom instructions appended to the system prompt for the LLM.
+        Defaults to ``DEFAULT_MEMORY_INSTRUCTIONS``.
+    """
+
+    def __init__(
+        self,
+        memory_dir: str | Path | None = None,
+        usage_instructions: str | None = None,
+    ) -> None:
+        MemoryTool.__init__(
+            self,
+            memory_dir=str(memory_dir) if memory_dir else None,
+            usage_instructions_to_llm=(
+                usage_instructions
+                if usage_instructions is not None
+                else DEFAULT_MEMORY_INSTRUCTIONS
+            ),
+        )
+        _SDKMemoryTool.__init__(self)  # type: ignore[call-arg]
+
+    # ---------------------------------------------------------------------- #
+    # ToolAbstract duck-typing overrides
+    # ---------------------------------------------------------------------- #
+
+    def is_model_supported(self, model_name: str) -> bool:
+        """Only Anthropic (Claude) models support the native memory tool."""
+        return "claude" in model_name.lower()
+
+    def is_invoked(self, command: str) -> bool:
+        """Return False — this tool is dispatched natively by AnthropicApi,
+        not via the shell command loop."""
+        return False
+
+    def clear_all(self) -> None:
+        """Delete all memory files (useful for testing or resetting state)."""
+        self._clear()
+        logger.info("🧠 AnthropicMemoryTool: memory cleared at %s", self._memory_dir)
+
+    # ---------------------------------------------------------------------- #
+    # BetaAbstractMemoryTool overrides — delegate to MemoryTool private methods
+    # ---------------------------------------------------------------------- #
+
+    @override
+    def clear_all_memory(self) -> str:
+        self.clear_all()
+        return "All memory cleared"
+
+    @override
+    def view(self, command: BetaMemoryTool20250818ViewCommand) -> str:
+        args = [command.path]
+        if command.view_range:
+            args += ["--start", str(command.view_range[0]), "--end", str(command.view_range[1])]
+        result = self._view(args)
+        if result.return_code != 0:
+            raise RuntimeError(result.stderr)
+        return result.stdout
+
+    @override
+    def create(self, command: BetaMemoryTool20250818CreateCommand) -> str:
+        result = self._create([command.path, command.file_text])
+        if result.return_code != 0:
+            raise RuntimeError(result.stderr)
+        return f"File created successfully at {command.path}"
+
+    @override
+    def str_replace(self, command: BetaMemoryTool20250818StrReplaceCommand) -> str:
+        result = self._str_replace([command.path, "--old", command.old_str, "--new", command.new_str])
+        if result.return_code != 0:
+            raise RuntimeError(result.stderr)
+        return f"File {command.path} has been edited"
+
+    @override
+    def insert(self, command: BetaMemoryTool20250818InsertCommand) -> str:
+        result = self._insert([
+            command.path,
+            "--line", str(command.insert_line),
+            "--text", command.insert_text,
+        ])
+        if result.return_code != 0:
+            raise RuntimeError(result.stderr)
+        return f"Text inserted at line {command.insert_line} in {command.path}"
+
+    @override
+    def delete(self, command: BetaMemoryTool20250818DeleteCommand) -> str:
+        result = self._delete([command.path])
+        if result.return_code != 0:
+            raise RuntimeError(result.stderr)
+        return result.stdout
+
+    @override
+    def rename(self, command: BetaMemoryTool20250818RenameCommand) -> str:
+        result = self._rename([command.old_path, command.new_path])
+        if result.return_code != 0:
+            raise RuntimeError(result.stderr)
+        return result.stdout
diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py
new file mode 100644
index 0000000..912fe6d
--- /dev/null
+++ b/src/microbots/tools/tool_definitions/memory_tool.py
@@ -0,0 +1,312 @@
+import logging
+import shlex
+import shutil
+from pathlib import Path
+from typing import Optional
+
+from pydantic.dataclasses import dataclass, Field
+
+from microbots.environment.Environment import CmdReturn
+from microbots.tools.external_tool import ExternalTool
+
+logger = logging.getLogger(" 🧠 MemoryTool")
+
+INSTRUCTIONS_TO_LLM = """
+Use this tool to persist information to files across steps — same interface as
+the Anthropic memory tool.  All paths must be under /memories/.
+
+MEMORY PROTOCOL:
+1. ALWAYS run `memory view /memories` BEFORE doing anything else to check for
+   earlier progress.
+2. Record status, findings and intermediate results as you go.
+3. Before completing a task, save your final results to memory.
+4. Keep the memory folder organised — rename or delete stale files.
+
+## Commands
+
+View a file or list a directory:
+  memory view <path>
+  memory view <path> --start <line> --end <line>
+
+Create a file:
+  memory create <path> <content>
+
+Replace a unique string in a file:
+  memory str_replace <path> --old "<old_text>" --new "<new_text>"
+
+Insert a line into a file (0 = prepend):
+  memory insert <path> --line <line_number> --text "<text>"
+
+Delete a file or directory:
+  memory delete <path>
+
+Rename / move a file:
+  memory rename <old_path> <new_path>
+
+Clear all memory:
+  memory clear
+
+## Examples
+
+  memory view /memories
+  memory create /memories/progress.md "## Progress\\n- Found bug in src/foo.py line 42"
+  memory str_replace /memories/progress.md --old "line 42" --new "line 45"
+  memory insert /memories/progress.md --line 0 --text "# Task Notes"
+  memory view /memories/progress.md --start 1 --end 10
+  memory delete /memories/old_notes.md
+  memory rename /memories/draft.md /memories/final.md
+
+## Notes
+- Paths must start with /memories/.
+- memory create overwrites if the file already exists.
+- memory str_replace requires the old text to appear exactly once.
+"""
+
+
+@dataclass
+class MemoryTool(ExternalTool):
+    """
+    File-backed memory tool that mirrors the ``AnthropicMemoryTool`` interface
+    but dispatches through the text command loop (compatible with all providers).
+
+    Subclass of ``ExternalTool`` — all command lists are empty so
+    ``install_tool``, ``setup_tool``, ``verify_tool_installation``, and
+    ``uninstall_tool`` are all effective no-ops inherited from ``ExternalTool``.
+
+    All files are stored under ``memory_dir`` on the host (default
+    ``~/.microbots/memory``).  The LLM uses paths like ``/memories/notes.md``
+    which are resolved relative to ``memory_dir``.
+
+    Supported subcommands
+    ---------------------
+    memory view <path> [--start N] [--end N]
+    memory create <path> <content>
+    memory str_replace <path> --old <text> --new <text>
+    memory insert <path> --line N --text <text>
+    memory delete <path>
+    memory rename <old> <new>
+    memory clear
+    """
+
+    name: str = Field(default="memory")
+    description: str = Field(
+        default="File-backed memory store — view, create, edit, delete files under /memories/."
+    )
+    usage_instructions_to_llm: str = Field(default=INSTRUCTIONS_TO_LLM)
+    memory_dir: Optional[str] = Field(default=None)
+
+    def __post_init__(self):
+        base = Path(self.memory_dir) if self.memory_dir else Path.home() / ".microbots" / "memory"
+        self._memory_dir = base
+        self._memory_dir.mkdir(parents=True, exist_ok=True)
+
+    def is_model_supported(self, model_name: str) -> bool:
+        return True
+
+    # ---------------------------------------------------------------------- #
+    # Path helpers
+    # ---------------------------------------------------------------------- #
+
+    def _resolve(self, path: str) -> Path:
+        """Resolve a /memories/… path to an absolute host path."""
+        stripped = path.lstrip("/")
+        if stripped == "memories":
+            rel = ""
+        elif stripped.startswith("memories/"):
+            rel = stripped[len("memories/"):]
+        elif stripped.startswith(("workdir", "home", "tmp", "var", "etc", "usr")):
+            raise ValueError(
+                f"Invalid memory path: {path!r}. Use paths under /memories/."
+            )
+        else:
+            rel = stripped  # treat as relative to memory_dir
+
+        resolved = (self._memory_dir / rel).resolve() if rel else self._memory_dir.resolve()
+        if not str(resolved).startswith(str(self._memory_dir.resolve())):
+            raise ValueError(f"Path traversal not allowed: {path!r}")
+        return resolved
+
+    # ---------------------------------------------------------------------- #
+    # ToolAbstract interface
+    # ---------------------------------------------------------------------- #
+
+    def is_invoked(self, command: str) -> bool:
+        return command.strip().startswith("memory ")
+
+    def invoke(self, command: str, parent_bot) -> CmdReturn:
+        try:
+            tokens = shlex.split(command)
+        except ValueError as exc:
+            return CmdReturn(stdout="", stderr=f"Parse error: {exc}", return_code=1)
+
+        if len(tokens) < 2:
+            return CmdReturn(stdout="", stderr="Usage: memory <subcommand> ...", return_code=1)
+
+        sub = tokens[1]
+        args = tokens[2:]
+
+        try:
+            if sub == "view":
+                return self._view(args)
+            elif sub == "create":
+                return self._create(args)
+            elif sub == "str_replace":
+                return self._str_replace(args)
+            elif sub == "insert":
+                return self._insert(args)
+            elif sub == "delete":
+                return self._delete(args)
+            elif sub == "rename":
+                return self._rename(args)
+            elif sub == "clear":
+                return self._clear()
+            else:
+                return CmdReturn(stdout="", stderr=f"Unknown subcommand: {sub!r}", return_code=1)
+        except (ValueError, FileNotFoundError, RuntimeError) as exc:
+            logger.error("🧠 MemoryTool error: %s", exc)
+            return CmdReturn(stdout="", stderr=str(exc), return_code=1)
+
+    # ---------------------------------------------------------------------- #
+    # Subcommand handlers
+    # ---------------------------------------------------------------------- #
+
+    def _view(self, args: list) -> CmdReturn:
+        if not args:
+            return CmdReturn(stdout="", stderr="Usage: memory view <path> [--start N] [--end N]", return_code=1)
+
+        path = args[0]
+        start_line = None
+        end_line = None
+        i = 1
+        while i < len(args):
+            if args[i] == "--start" and i + 1 < len(args):
+                start_line = int(args[i + 1]); i += 2
+            elif args[i] == "--end" and i + 1 < len(args):
+                end_line = int(args[i + 1]); i += 2
+            else:
+                i += 1
+
+        resolved = self._resolve(path)
+        if not resolved.exists():
+            raise RuntimeError(f"Path not found: {path!r}")
+
+        if resolved.is_dir():
+            items = [
+                (f"{item.name}/" if item.is_dir() else item.name)
+                for item in sorted(resolved.iterdir())
+                if not item.name.startswith(".")
+            ]
+            result = f"Directory: {path}\n" + "\n".join(f"- {i}" for i in items)
+            return CmdReturn(stdout=result, stderr="", return_code=0)
+
+        lines = resolved.read_text(encoding="utf-8").splitlines()
+        if start_line is not None or end_line is not None:
+            s = max(0, (start_line or 1) - 1)
+            e = len(lines) if (end_line is None or end_line == -1) else end_line
+            lines = lines[s:e]
+            base_num = s + 1
+        else:
+            base_num = 1
+        numbered = "\n".join(f"{i + base_num:4d}: {line}" for i, line in enumerate(lines))
+        return CmdReturn(stdout=numbered, stderr="", return_code=0)
+
+    def _create(self, args: list) -> CmdReturn:
+        if len(args) < 2:
+            return CmdReturn(stdout="", stderr="Usage: memory create <path> <content>", return_code=1)
+        path, content = args[0], args[1]
+        resolved = self._resolve(path)
+        resolved.parent.mkdir(parents=True, exist_ok=True)
+        resolved.write_text(content, encoding="utf-8")
+        logger.info("🧠 Memory file created: %s", path)
+        return CmdReturn(stdout=f"File created: {path}", stderr="", return_code=0)
+
+    def _str_replace(self, args: list) -> CmdReturn:
+        if not args:
+            return CmdReturn(stdout="", stderr="Usage: memory str_replace <path> --old <text> --new <text>", return_code=1)
+        path = args[0]
+        old_text = new_text = None
+        i = 1
+        while i < len(args):
+            if args[i] == "--old" and i + 1 < len(args):
+                old_text = args[i + 1]; i += 2
+            elif args[i] == "--new" and i + 1 < len(args):
+                new_text = args[i + 1]; i += 2
+            else:
+                i += 1
+        if old_text is None or new_text is None:
+            return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1)
+        resolved = self._resolve(path)
+        if not resolved.is_file():
+            raise FileNotFoundError(f"File not found: {path!r}")
+        content = resolved.read_text(encoding="utf-8")
+        count = content.count(old_text)
+        if count == 0:
+            raise ValueError(f"Text not found in {path!r}")
+        if count > 1:
+            raise ValueError(f"Text appears {count} times in {path!r} — must be unique")
+        resolved.write_text(content.replace(old_text, new_text, 1), encoding="utf-8")
+        return CmdReturn(stdout=f"File {path} edited.", stderr="", return_code=0)
+
+    def _insert(self, args: list) -> CmdReturn:
+        if not args:
+            return CmdReturn(stdout="", stderr="Usage: memory insert <path> --line N --text <text>", return_code=1)
+        path = args[0]
+        line_num = text = None
+        i = 1
+        while i < len(args):
+            if args[i] == "--line" and i + 1 < len(args):
+                line_num = int(args[i + 1]); i += 2
+            elif args[i] == "--text" and i + 1 < len(args):
+                text = args[i + 1]; i += 2
+            else:
+                i += 1
+        if line_num is None or text is None:
+            return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1)
+        resolved = self._resolve(path)
+        if not resolved.is_file():
+            raise FileNotFoundError(f"File not found: {path!r}")
+        lines = resolved.read_text(encoding="utf-8").splitlines()
+        if line_num < 0 or line_num > len(lines):
+            raise ValueError(f"Invalid line number {line_num}. Must be 0–{len(lines)}.")
+        lines.insert(line_num, text.rstrip("\n"))
+        resolved.write_text("\n".join(lines) + "\n", encoding="utf-8")
+        return CmdReturn(stdout=f"Text inserted at line {line_num} in {path}.", stderr="", return_code=0)
+
+    def _delete(self, args: list) -> CmdReturn:
+        if not args:
+            return CmdReturn(stdout="", stderr="Usage: memory delete <path>", return_code=1)
+        path = args[0]
+        if path.rstrip("/") in ("/memories", "memories", ""):
+            raise ValueError("Cannot delete the /memories root directory")
+        resolved = self._resolve(path)
+        if resolved.is_file():
+            resolved.unlink()
+            logger.info("🧠 Memory file deleted: %s", path)
+            return CmdReturn(stdout=f"Deleted: {path}", stderr="", return_code=0)
+        if resolved.is_dir():
+            shutil.rmtree(resolved)
+            logger.info("🧠 Memory directory deleted: %s", path)
+            return CmdReturn(stdout=f"Deleted directory: {path}", stderr="", return_code=0)
+        raise FileNotFoundError(f"Path not found: {path!r}")
+
+    def _rename(self, args: list) -> CmdReturn:
+        if len(args) < 2:
+            return CmdReturn(stdout="", stderr="Usage: memory rename <old_path> <new_path>", return_code=1)
+        old_path, new_path = args[0], args[1]
+        old_resolved = self._resolve(old_path)
+        new_resolved = self._resolve(new_path)
+        if not old_resolved.exists():
+            raise FileNotFoundError(f"Source not found: {old_path!r}")
+        if new_resolved.exists():
+            raise ValueError(f"Destination already exists: {new_path!r}")
+        new_resolved.parent.mkdir(parents=True, exist_ok=True)
+        old_resolved.rename(new_resolved)
+        logger.info("🧠 Memory renamed: %s → %s", old_path, new_path)
+        return CmdReturn(stdout=f"Renamed {old_path} to {new_path}.", stderr="", return_code=0)
+
+    def _clear(self) -> CmdReturn:
+        if self._memory_dir.exists():
+            shutil.rmtree(self._memory_dir)
+            self._memory_dir.mkdir(parents=True, exist_ok=True)
+        logger.info("🧠 Memory cleared.")
+        return CmdReturn(stdout="Memory cleared.", stderr="", return_code=0)

From 315cc03ad359947624f5ebdbc09efed0af094ae4 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Tue, 3 Mar 2026 10:51:30 +0000
Subject: [PATCH 02/20] Refactor MemoryTool error handling to return CmdReturn
 instead of raising exceptions and add tests

---
 .../tools/tool_definitions/memory_tool.py     |  20 +-
 test/llm/test_anthropic_api.py                | 367 ++++++++++++++
 .../tool_definitions/test_memory_tool.py      | 449 ++++++++++++++++++
 3 files changed, 826 insertions(+), 10 deletions(-)
 create mode 100644 test/tools/tool_definitions/test_memory_tool.py

diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py
index 912fe6d..310f880 100644
--- a/src/microbots/tools/tool_definitions/memory_tool.py
+++ b/src/microbots/tools/tool_definitions/memory_tool.py
@@ -188,7 +188,7 @@ def _view(self, args: list) -> CmdReturn:
 
         resolved = self._resolve(path)
         if not resolved.exists():
-            raise RuntimeError(f"Path not found: {path!r}")
+            return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1)
 
         if resolved.is_dir():
             items = [
@@ -237,13 +237,13 @@ def _str_replace(self, args: list) -> CmdReturn:
             return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1)
         resolved = self._resolve(path)
         if not resolved.is_file():
-            raise FileNotFoundError(f"File not found: {path!r}")
+            return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1)
         content = resolved.read_text(encoding="utf-8")
         count = content.count(old_text)
         if count == 0:
-            raise ValueError(f"Text not found in {path!r}")
+            return CmdReturn(stdout="", stderr=f"Text not found in {path!r}", return_code=1)
         if count > 1:
-            raise ValueError(f"Text appears {count} times in {path!r} — must be unique")
+            return CmdReturn(stdout="", stderr=f"Text appears {count} times in {path!r} — must be unique", return_code=1)
         resolved.write_text(content.replace(old_text, new_text, 1), encoding="utf-8")
         return CmdReturn(stdout=f"File {path} edited.", stderr="", return_code=0)
 
@@ -264,10 +264,10 @@ def _insert(self, args: list) -> CmdReturn:
             return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1)
         resolved = self._resolve(path)
         if not resolved.is_file():
-            raise FileNotFoundError(f"File not found: {path!r}")
+            return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1)
         lines = resolved.read_text(encoding="utf-8").splitlines()
         if line_num < 0 or line_num > len(lines):
-            raise ValueError(f"Invalid line number {line_num}. Must be 0–{len(lines)}.")
+            return CmdReturn(stdout="", stderr=f"Invalid line number {line_num}. Must be 0–{len(lines)}.", return_code=1)
         lines.insert(line_num, text.rstrip("\n"))
         resolved.write_text("\n".join(lines) + "\n", encoding="utf-8")
         return CmdReturn(stdout=f"Text inserted at line {line_num} in {path}.", stderr="", return_code=0)
@@ -277,7 +277,7 @@ def _delete(self, args: list) -> CmdReturn:
             return CmdReturn(stdout="", stderr="Usage: memory delete <path>", return_code=1)
         path = args[0]
         if path.rstrip("/") in ("/memories", "memories", ""):
-            raise ValueError("Cannot delete the /memories root directory")
+            return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1)
         resolved = self._resolve(path)
         if resolved.is_file():
             resolved.unlink()
@@ -287,7 +287,7 @@ def _delete(self, args: list) -> CmdReturn:
             shutil.rmtree(resolved)
             logger.info("🧠 Memory directory deleted: %s", path)
             return CmdReturn(stdout=f"Deleted directory: {path}", stderr="", return_code=0)
-        raise FileNotFoundError(f"Path not found: {path!r}")
+        return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1)
 
     def _rename(self, args: list) -> CmdReturn:
         if len(args) < 2:
@@ -296,9 +296,9 @@ def _rename(self, args: list) -> CmdReturn:
         old_resolved = self._resolve(old_path)
         new_resolved = self._resolve(new_path)
         if not old_resolved.exists():
-            raise FileNotFoundError(f"Source not found: {old_path!r}")
+            return CmdReturn(stdout="", stderr=f"Source not found: {old_path!r}", return_code=1)
         if new_resolved.exists():
-            raise ValueError(f"Destination already exists: {new_path!r}")
+            return CmdReturn(stdout="", stderr=f"Destination already exists: {new_path!r}", return_code=1)
         new_resolved.parent.mkdir(parents=True, exist_ok=True)
         old_resolved.rename(new_resolved)
         logger.info("🧠 Memory renamed: %s → %s", old_path, new_path)
diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py
index 674294c..49674aa 100644
--- a/test/llm/test_anthropic_api.py
+++ b/test/llm/test_anthropic_api.py
@@ -98,7 +98,9 @@ def test_ask_successful_response(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "echo 'hello'",
@@ -133,7 +135,9 @@ def test_ask_with_task_done_true(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": True,
             "command": "",
@@ -157,12 +161,16 @@ def test_ask_with_retry_on_invalid_response(self):
 
         # Mock the Anthropic client to return invalid then valid response
         mock_invalid_response = Mock()
+        mock_invalid_response.stop_reason = "end_turn"
         mock_invalid_content = Mock()
+        mock_invalid_content.type = "text"
         mock_invalid_content.text = "invalid json"
         mock_invalid_response.content = [mock_invalid_content]
 
         mock_valid_response = Mock()
+        mock_valid_response.stop_reason = "end_turn"
         mock_valid_content = Mock()
+        mock_valid_content.type = "text"
         mock_valid_content.text = json.dumps({
             "task_done": False,
             "command": "ls -la",
@@ -193,7 +201,9 @@ def test_ask_appends_user_message(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "pwd",
@@ -218,7 +228,9 @@ def test_ask_appends_assistant_response_as_json(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "echo test",
@@ -247,7 +259,9 @@ def test_ask_uses_asdict_for_response(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         response_dict = {
             "task_done": True,
             "command": "",
@@ -277,7 +291,9 @@ def test_ask_resets_retries_to_zero(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "ls",
@@ -299,7 +315,9 @@ def test_ask_extracts_json_from_markdown(self):
 
         # Mock response with markdown-wrapped JSON
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = """Here's the response:
 ```json
 {
@@ -420,7 +438,9 @@ def test_ask_with_empty_message(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "echo ''",
@@ -443,7 +463,9 @@ def test_multiple_ask_calls_append_messages(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
+        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
+        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "pwd",
@@ -513,6 +535,351 @@ def test_anthropic_api_clear_history_integration(self):
         assert len(api.messages) == 0  # Anthropic doesn't store system in messages
 
 
+# ============================================================================
+# Tests for native_tools support (new changes)
+# ============================================================================
+
+@pytest.mark.unit
+class TestAnthropicApiNativeToolsInit:
+    """Tests for __init__ native_tools caching."""
+
+    @pytest.fixture(autouse=True)
+    def _use_patch(self, patch_anthropic_config):
+        pass
+
+    def test_init_without_native_tools_has_empty_caches(self):
+        api = AnthropicApi(system_prompt="test")
+
+        assert api.native_tools == []
+        assert api._native_tool_dicts == []
+        assert api._native_tools_by_name == {}
+
+    def test_init_with_none_native_tools_has_empty_caches(self):
+        api = AnthropicApi(system_prompt="test", native_tools=None)
+
+        assert api._native_tool_dicts == []
+        assert api._native_tools_by_name == {}
+
+    def test_init_with_single_native_tool_caches_dict(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"}
+
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        assert api._native_tool_dicts == [{"name": "memory", "type": "memory_20250818"}]
+
+    def test_init_with_single_native_tool_caches_by_name(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        assert "memory" in api._native_tools_by_name
+        assert api._native_tools_by_name["memory"] is tool
+
+    def test_init_with_multiple_native_tools_caches_all(self):
+        tool1 = Mock()
+        tool1.to_dict.return_value = {"name": "memory"}
+        tool2 = Mock()
+        tool2.to_dict.return_value = {"name": "bash"}
+
+        api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2])
+
+        assert len(api._native_tool_dicts) == 2
+        assert api._native_tools_by_name["memory"] is tool1
+        assert api._native_tools_by_name["bash"] is tool2
+
+    def test_init_calls_to_dict_exactly_once_per_tool(self):
+        """to_dict() must not be called again on subsequent API calls."""
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+
+        AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        assert tool.to_dict.call_count == 1
+
+
+@pytest.mark.unit
+class TestAnthropicApiCallApiWithTools:
+    """Tests for _call_api including/excluding the tools kwarg."""
+
+    @pytest.fixture(autouse=True)
+    def _use_patch(self, patch_anthropic_config):
+        pass
+
+    def test_call_api_without_tools_omits_tools_kwarg(self):
+        api = AnthropicApi(system_prompt="test", deployment_name="claude-3")
+        api.messages = [{"role": "user", "content": "hello"}]
+        api.ai_client.messages.create = Mock(return_value=Mock())
+
+        api._call_api()
+
+        call_kwargs = api.ai_client.messages.create.call_args[1]
+        assert "tools" not in call_kwargs
+
+    def test_call_api_with_tools_passes_cached_dicts(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"}
+        api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool])
+        api.messages = [{"role": "user", "content": "hello"}]
+        api.ai_client.messages.create = Mock(return_value=Mock())
+
+        api._call_api()
+
+        call_kwargs = api.ai_client.messages.create.call_args[1]
+        assert "tools" in call_kwargs
+        assert call_kwargs["tools"] == [{"name": "memory", "type": "memory_20250818"}]
+
+    def test_call_api_does_not_call_to_dict_again(self):
+        """to_dict() should only be called during __init__, never during _call_api."""
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool])
+        api.messages = [{"role": "user", "content": "hello"}]
+        api.ai_client.messages.create = Mock(return_value=Mock())
+
+        count_after_init = tool.to_dict.call_count  # should be 1
+        api._call_api()
+        api._call_api()
+
+        assert tool.to_dict.call_count == count_after_init  # no increase
+
+
+@pytest.mark.unit
+class TestAnthropicApiDispatchToolUse:
+    """Tests for _dispatch_tool_use."""
+
+    @pytest.fixture(autouse=True)
+    def _use_patch(self, patch_anthropic_config):
+        pass
+
+    # ------------------------------------------------------------------ #
+    # Helpers
+    # ------------------------------------------------------------------ #
+
+    @staticmethod
+    def _tool_use_block(name, tool_id="tu_001", input_data=None):
+        block = Mock()
+        block.type = "tool_use"
+        block.name = name
+        block.id = tool_id
+        block.input = input_data or {}
+        block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": name}
+        return block
+
+    @staticmethod
+    def _text_block(text="hello"):
+        block = Mock()
+        block.type = "text"
+        block.text = text
+        block.model_dump.return_value = {"type": "text", "text": text}
+        return block
+
+    # ------------------------------------------------------------------ #
+    # Tests
+    # ------------------------------------------------------------------ #
+
+    def test_dispatch_appends_assistant_message_first(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "ok"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        response = Mock()
+        response.content = [self._tool_use_block("memory")]
+        api._dispatch_tool_use(response)
+
+        assert api.messages[0]["role"] == "assistant"
+
+    def test_dispatch_appends_tool_result_user_message(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "file listing"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        response = Mock()
+        response.content = [self._tool_use_block("memory", tool_id="tu_abc")]
+        api._dispatch_tool_use(response)
+
+        user_msg = api.messages[1]
+        assert user_msg["role"] == "user"
+        assert user_msg["content"][0]["type"] == "tool_result"
+        assert user_msg["content"][0]["tool_use_id"] == "tu_abc"
+        assert user_msg["content"][0]["content"] == "file listing"
+
+    def test_dispatch_calls_tool_with_correct_input(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "ok"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        input_data = {"command": "view", "path": "/memories"}
+        response = Mock()
+        response.content = [self._tool_use_block("memory", input_data=input_data)]
+        api._dispatch_tool_use(response)
+
+        tool.call.assert_called_once_with(input_data)
+
+    def test_dispatch_unknown_tool_returns_error_in_result(self):
+        api = AnthropicApi(system_prompt="test")  # no native tools
+
+        response = Mock()
+        response.content = [self._tool_use_block("unknown_tool", tool_id="tu_err")]
+        api._dispatch_tool_use(response)
+
+        content = api.messages[1]["content"][0]["content"]
+        assert "Error" in content
+        assert "unknown_tool" in content
+
+    def test_dispatch_tool_exception_returns_error_message(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.side_effect = RuntimeError("disk full")
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        response = Mock()
+        response.content = [self._tool_use_block("memory", tool_id="tu_exc")]
+        api._dispatch_tool_use(response)
+
+        content = api.messages[1]["content"][0]["content"]
+        assert "Error" in content
+        assert "disk full" in content
+
+    def test_dispatch_skips_non_tool_use_content_blocks(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "result"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        response = Mock()
+        response.content = [
+            self._text_block("thinking..."),
+            self._tool_use_block("memory", tool_id="tu_only"),
+        ]
+        api._dispatch_tool_use(response)
+
+        tool_results = api.messages[1]["content"]
+        assert len(tool_results) == 1
+        assert tool_results[0]["tool_use_id"] == "tu_only"
+
+    def test_dispatch_handles_multiple_tool_use_blocks(self):
+        tool1 = Mock()
+        tool1.to_dict.return_value = {"name": "memory"}
+        tool1.call.return_value = "memory result"
+        tool2 = Mock()
+        tool2.to_dict.return_value = {"name": "bash"}
+        tool2.call.return_value = "bash result"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2])
+
+        response = Mock()
+        response.content = [
+            self._tool_use_block("memory", tool_id="id_1"),
+            self._tool_use_block("bash", tool_id="id_2"),
+        ]
+        api._dispatch_tool_use(response)
+
+        results = api.messages[1]["content"]
+        assert len(results) == 2
+        assert results[0]["tool_use_id"] == "id_1"
+        assert results[0]["content"] == "memory result"
+        assert results[1]["tool_use_id"] == "id_2"
+        assert results[1]["content"] == "bash result"
+
+
+@pytest.mark.unit
+class TestAnthropicApiAskWithToolUseLoop:
+    """Tests for ask() cycling through tool_use rounds before returning JSON."""
+
+    @pytest.fixture(autouse=True)
+    def _use_patch(self, patch_anthropic_config):
+        pass
+
+    @staticmethod
+    def _tool_use_response(tool_name, tool_id):
+        block = Mock()
+        block.type = "tool_use"
+        block.name = tool_name
+        block.id = tool_id
+        block.input = {}
+        block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": tool_name}
+        response = Mock()
+        response.stop_reason = "tool_use"
+        response.content = [block]
+        return response
+
+    @staticmethod
+    def _text_response(json_dict):
+        block = Mock()
+        block.type = "text"
+        block.text = json.dumps(json_dict)
+        block.model_dump.return_value = {"type": "text", "text": block.text}
+        response = Mock()
+        response.stop_reason = "end_turn"
+        response.content = [block]
+        return response
+
+    def test_ask_dispatches_one_tool_use_round_then_returns(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "viewed /memories"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        tool_resp = self._tool_use_response("memory", "tu_1")
+        final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""})
+        api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp])
+
+        result = api.ask("do the task")
+
+        assert api.ai_client.messages.create.call_count == 2
+        tool.call.assert_called_once()
+        assert result.command == "ls /"
+
+    def test_ask_dispatches_multiple_tool_use_rounds(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "ok"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        tool_resp1 = self._tool_use_response("memory", "tu_1")
+        tool_resp2 = self._tool_use_response("memory", "tu_2")
+        final_resp = self._text_response({"task_done": True, "command": "", "thoughts": "done"})
+        api.ai_client.messages.create = Mock(side_effect=[tool_resp1, tool_resp2, final_resp])
+
+        result = api.ask("do the task")
+
+        assert api.ai_client.messages.create.call_count == 3
+        assert tool.call.call_count == 2
+        assert result.task_done is True
+
+    def test_ask_without_tool_use_does_not_dispatch(self):
+        api = AnthropicApi(system_prompt="test")
+
+        final_resp = self._text_response({"task_done": False, "command": "pwd", "thoughts": ""})
+        api.ai_client.messages.create = Mock(return_value=final_resp)
+
+        result = api.ask("where am I?")
+
+        assert api.ai_client.messages.create.call_count == 1
+        assert result.command == "pwd"
+
+    def test_ask_tool_use_messages_are_added_to_history(self):
+        tool = Mock()
+        tool.to_dict.return_value = {"name": "memory"}
+        tool.call.return_value = "result"
+        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+
+        tool_resp = self._tool_use_response("memory", "tu_1")
+        final_resp = self._text_response({"task_done": False, "command": "echo hi", "thoughts": ""})
+        api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp])
+
+        api.ask("do it")
+
+        # Messages: user, assistant(tool_use), user(tool_result), assistant(final json)
+        roles = [m["role"] for m in api.messages]
+        assert roles.count("user") == 2
+        assert roles.count("assistant") == 2
+
+
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])
 
diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py
new file mode 100644
index 0000000..2c317f6
--- /dev/null
+++ b/test/tools/tool_definitions/test_memory_tool.py
@@ -0,0 +1,449 @@
+"""
+Unit tests for MemoryTool — file-backed memory store.
+
+All tests use pytest's tmp_path fixture so they are isolated from the
+user's real ~/.microbots/memory directory.
+"""
+import sys
+import os
+import pytest
+from pathlib import Path
+from unittest.mock import Mock
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../src")))
+
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
+from microbots.environment.Environment import CmdReturn
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_tool(tmp_path: Path) -> MemoryTool:
+    """Return a MemoryTool whose memory_dir lives under tmp_path."""
+    return MemoryTool(memory_dir=str(tmp_path / "memory"))
+
+
+# ---------------------------------------------------------------------------
+# Initialisation
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolInit:
+
+    def test_memory_dir_is_created_on_init(self, tmp_path):
+        mem_dir = tmp_path / "memory"
+        assert not mem_dir.exists()
+
+        make_tool(tmp_path)
+
+        assert mem_dir.exists()
+        assert mem_dir.is_dir()
+
+    def test_default_memory_dir_under_home(self, monkeypatch, tmp_path):
+        """When no memory_dir is given it falls back to ~/.microbots/memory."""
+        monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path))
+        tool = MemoryTool()
+        assert tool._memory_dir == tmp_path / ".microbots" / "memory"
+
+
+# ---------------------------------------------------------------------------
+# is_invoked
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolIsInvoked:
+
+    def test_returns_true_for_memory_commands(self, tmp_path):
+        tool = make_tool(tmp_path)
+        assert tool.is_invoked("memory view /memories") is True
+        assert tool.is_invoked("memory create /memories/f.md hello") is True
+
+    def test_returns_false_for_other_commands(self, tmp_path):
+        tool = make_tool(tmp_path)
+        assert tool.is_invoked("ls -la") is False
+        assert tool.is_invoked("cat file.txt") is False
+        assert tool.is_invoked("") is False
+
+    def test_strips_leading_whitespace(self, tmp_path):
+        tool = make_tool(tmp_path)
+        assert tool.is_invoked("  memory view /memories") is True
+
+
+# ---------------------------------------------------------------------------
+# Path resolution (_resolve)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolResolve:
+
+    def test_resolve_memories_root(self, tmp_path):
+        tool = make_tool(tmp_path)
+        assert tool._resolve("/memories") == tool._memory_dir.resolve()
+
+    def test_resolve_memories_subpath(self, tmp_path):
+        tool = make_tool(tmp_path)
+        resolved = tool._resolve("/memories/notes.md")
+        assert resolved == (tool._memory_dir / "notes.md").resolve()
+
+    def test_resolve_rejects_path_traversal(self, tmp_path):
+        tool = make_tool(tmp_path)
+        with pytest.raises(ValueError, match="Path traversal"):
+            tool._resolve("/memories/../../etc/passwd")
+
+    def test_resolve_rejects_non_memory_paths(self, tmp_path):
+        tool = make_tool(tmp_path)
+        for bad in ("/workdir/file", "/home/user/file", "/tmp/file"):
+            with pytest.raises(ValueError):
+                tool._resolve(bad)
+
+
+# ---------------------------------------------------------------------------
+# _view
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolView:
+
+    def test_view_directory_lists_contents(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "notes.md").write_text("hello")
+        (tool._memory_dir / "sub").mkdir()
+
+        result = tool._view(["/memories"])
+
+        assert result.return_code == 0
+        assert "notes.md" in result.stdout
+        assert "sub/" in result.stdout
+
+    def test_view_file_returns_numbered_lines(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\nline2\nline3\n")
+
+        result = tool._view(["/memories/f.md"])
+
+        assert result.return_code == 0
+        assert "1:" in result.stdout
+        assert "line1" in result.stdout
+        assert "3:" in result.stdout
+
+    def test_view_file_with_line_range(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n")
+
+        result = tool._view(["/memories/f.md", "--start", "2", "--end", "4"])
+
+        assert result.return_code == 0
+        assert "b" in result.stdout
+        assert "d" in result.stdout
+        assert "a" not in result.stdout
+        assert "e" not in result.stdout
+
+    def test_view_nonexistent_path_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+
+        result = tool._view(["/memories/nonexistent.md"])
+
+        assert result.return_code != 0
+
+    def test_view_no_args_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._view([])
+        assert result.return_code != 0
+
+
+# ---------------------------------------------------------------------------
+# _create
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolCreate:
+
+    def test_create_writes_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+
+        result = tool._create(["/memories/notes.md", "hello world"])
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "notes.md").read_text() == "hello world"
+
+    def test_create_overwrites_existing_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("old content")
+
+        result = tool._create(["/memories/f.md", "new content"])
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "f.md").read_text() == "new content"
+
+    def test_create_creates_parent_directories(self, tmp_path):
+        tool = make_tool(tmp_path)
+
+        result = tool._create(["/memories/sub/dir/f.md", "content"])
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "sub" / "dir" / "f.md").exists()
+
+    def test_create_missing_args_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._create(["/memories/f.md"])  # missing content
+        assert result.return_code != 0
+
+
+# ---------------------------------------------------------------------------
+# _str_replace
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolStrReplace:
+
+    def test_str_replace_replaces_unique_text(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello world")
+
+        result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "goodbye"])
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "f.md").read_text() == "goodbye world"
+
+    def test_str_replace_fails_when_text_not_found(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello world")
+
+        result = tool._str_replace(["/memories/f.md", "--old", "nothere", "--new", "x"])
+
+        assert result.return_code != 0
+        assert "not found" in result.stderr.lower()
+
+    def test_str_replace_fails_when_text_not_unique(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello hello")
+
+        result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "bye"])
+
+        assert result.return_code != 0
+        assert "2" in result.stderr  # appears N times
+
+    def test_str_replace_missing_flags_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("text")
+        result = tool._str_replace(["/memories/f.md"])
+        assert result.return_code != 0
+
+    def test_str_replace_nonexistent_file_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"])
+        assert result.return_code != 0
+
+
+# ---------------------------------------------------------------------------
+# _insert
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolInsert:
+
+    def test_insert_prepends_at_line_zero(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\nline2\n")
+
+        result = tool._insert(["/memories/f.md", "--line", "0", "--text", "prepended"])
+
+        assert result.return_code == 0
+        lines = (tool._memory_dir / "f.md").read_text().splitlines()
+        assert lines[0] == "prepended"
+        assert lines[1] == "line1"
+
+    def test_insert_at_end_of_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\nline2\n")
+
+        result = tool._insert(["/memories/f.md", "--line", "2", "--text", "appended"])
+
+        assert result.return_code == 0
+        lines = (tool._memory_dir / "f.md").read_text().splitlines()
+        assert lines[-1] == "appended"
+
+    def test_insert_invalid_line_number_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\n")
+
+        result = tool._insert(["/memories/f.md", "--line", "99", "--text", "x"])
+
+        assert result.return_code != 0
+
+    def test_insert_nonexistent_file_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._insert(["/memories/missing.md", "--line", "0", "--text", "x"])
+        assert result.return_code != 0
+
+    def test_insert_missing_flags_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\n")
+        result = tool._insert(["/memories/f.md"])
+        assert result.return_code != 0
+
+
+# ---------------------------------------------------------------------------
+# _delete
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolDelete:
+
+    def test_delete_removes_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        f = tool._memory_dir / "f.md"
+        f.write_text("data")
+
+        result = tool._delete(["/memories/f.md"])
+
+        assert result.return_code == 0
+        assert not f.exists()
+
+    def test_delete_removes_directory(self, tmp_path):
+        tool = make_tool(tmp_path)
+        sub = tool._memory_dir / "sub"
+        sub.mkdir()
+        (sub / "f.md").write_text("data")
+
+        result = tool._delete(["/memories/sub"])
+
+        assert result.return_code == 0
+        assert not sub.exists()
+
+    def test_delete_prevents_root_deletion(self, tmp_path):
+        tool = make_tool(tmp_path)
+        for path in ("/memories", "memories", "/memories/"):
+            result = tool._delete([path])
+            assert result.return_code != 0
+
+    def test_delete_nonexistent_path_raises(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._delete(["/memories/nonexistent.md"])
+        assert result.return_code != 0
+
+    def test_delete_no_args_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._delete([])
+        assert result.return_code != 0
+
+
+# ---------------------------------------------------------------------------
+# _rename
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolRename:
+
+    def test_rename_moves_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        src = tool._memory_dir / "old.md"
+        src.write_text("content")
+
+        result = tool._rename(["/memories/old.md", "/memories/new.md"])
+
+        assert result.return_code == 0
+        assert not src.exists()
+        assert (tool._memory_dir / "new.md").read_text() == "content"
+
+    def test_rename_nonexistent_source_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._rename(["/memories/missing.md", "/memories/new.md"])
+        assert result.return_code != 0
+
+    def test_rename_fails_if_destination_exists(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "a.md").write_text("a")
+        (tool._memory_dir / "b.md").write_text("b")
+
+        result = tool._rename(["/memories/a.md", "/memories/b.md"])
+
+        assert result.return_code != 0
+
+    def test_rename_missing_args_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool._rename(["/memories/a.md"])
+        assert result.return_code != 0
+
+
+# ---------------------------------------------------------------------------
+# _clear
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolClear:
+
+    def test_clear_removes_all_files(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "a.md").write_text("a")
+        (tool._memory_dir / "b.md").write_text("b")
+
+        result = tool._clear()
+
+        assert result.return_code == 0
+        assert list(tool._memory_dir.iterdir()) == []
+
+    def test_clear_leaves_memory_dir_intact(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("data")
+
+        tool._clear()
+
+        assert tool._memory_dir.exists()
+
+
+# ---------------------------------------------------------------------------
+# invoke — full command dispatch
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolInvoke:
+
+    def test_invoke_view_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello")
+
+        result = tool.invoke("memory view /memories/f.md", parent_bot=Mock())
+
+        assert result.return_code == 0
+        assert "hello" in result.stdout
+
+    def test_invoke_create_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+
+        result = tool.invoke('memory create /memories/n.md "some content"', parent_bot=Mock())
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "n.md").read_text() == "some content"
+
+    def test_invoke_clear_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("data")
+
+        result = tool.invoke("memory clear", parent_bot=Mock())
+
+        assert result.return_code == 0
+        assert list(tool._memory_dir.iterdir()) == []
+
+    def test_invoke_unknown_subcommand_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool.invoke("memory frobnicate /memories/f.md", parent_bot=Mock())
+        assert result.return_code != 0
+        assert "Unknown subcommand" in result.stderr
+
+    def test_invoke_too_few_tokens_returns_error(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool.invoke("memory", parent_bot=Mock())
+        assert result.return_code != 0
+
+    def test_invoke_handles_bad_quoting_gracefully(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool.invoke('memory create /memories/f.md "unclosed', parent_bot=Mock())
+        assert result.return_code != 0
+        assert "Parse error" in result.stderr
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

From 0260226a00118678c240951d5e66611b4cb8e0a1 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 3 Mar 2026 07:48:13 +0000
Subject: [PATCH 03/20] Initial plan


From ae8e01ef1941a529f3f4a6cc496ed2cf284e1869 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 3 Mar 2026 07:49:29 +0000
Subject: [PATCH 04/20] Add workflow_dispatch trigger to dockerBuildPush
 workflow

Co-authored-by: KavyaSree2610 <92566732+KavyaSree2610@users.noreply.github.com>
---
 .github/workflows/dockerBuildPush.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/dockerBuildPush.yml b/.github/workflows/dockerBuildPush.yml
index 08341d0..ae10709 100644
--- a/.github/workflows/dockerBuildPush.yml
+++ b/.github/workflows/dockerBuildPush.yml
@@ -1,6 +1,7 @@
 name: Build and Push to Docker Hub
 
 on:
+  workflow_dispatch:
   push:
     paths:
       - 'src/microbots/environment/local_docker/image_builder/Dockerfile'

From abba9026ae427110e0479f5f329f30cd7b8eea5a Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Thu, 5 Mar 2026 08:22:40 +0000
Subject: [PATCH 05/20] Modify comments in AnthropicMemoryTool implementation

---
 .../tools/tool_definitions/anthropic_memory_tool.py         | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
index e80750c..9cf547e 100644
--- a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
+++ b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
@@ -8,8 +8,8 @@
 
 This implementation extends both:
   - ``MemoryTool``:  provides all file-operation logic (_resolve, _view,
-    _create, _str_replace, _insert, _delete, _rename, _clear) and the
-    ToolAbstract duck-typing interface.
+    _create, _str_replace, _insert, _delete, _rename, _clear) and satisfies
+    the ``ToolAbstract`` ABC (install_tool, verify_tool_installation, etc.).
   - ``BetaAbstractMemoryTool`` (SDK): provides native Anthropic dispatch and
     the ``to_dict()`` / ``call()`` interface required by AnthropicApi.
 
@@ -108,7 +108,7 @@ def __init__(
         _SDKMemoryTool.__init__(self)  # type: ignore[call-arg]
 
     # ---------------------------------------------------------------------- #
-    # ToolAbstract duck-typing overrides
+    # ToolAbstract overrides
     # ---------------------------------------------------------------------- #
 
     def is_model_supported(self, model_name: str) -> bool:

From 213aa9e70bd249df48a74e1e956930ea14a76656 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Thu, 5 Mar 2026 09:01:00 +0000
Subject: [PATCH 06/20] Add unit tests for MemoryTool and AnthropicMemoryTool
 functionality

---
 test/bot/test_upgrade_tools_for_provider.py   | 161 ++++++++
 .../test_anthropic_memory_tool.py             | 347 ++++++++++++++++++
 .../tool_definitions/test_memory_tool.py      | 104 ++++++
 3 files changed, 612 insertions(+)
 create mode 100644 test/bot/test_upgrade_tools_for_provider.py
 create mode 100644 test/tools/tool_definitions/test_anthropic_memory_tool.py

diff --git a/test/bot/test_upgrade_tools_for_provider.py b/test/bot/test_upgrade_tools_for_provider.py
new file mode 100644
index 0000000..f5d1e98
--- /dev/null
+++ b/test/bot/test_upgrade_tools_for_provider.py
@@ -0,0 +1,161 @@
+"""
+Unit tests for MicroBot._upgrade_tools_for_provider.
+
+These tests verify that plain ``MemoryTool`` instances are automatically
+replaced with ``AnthropicMemoryTool`` when the model provider is Anthropic,
+and that no changes are made for other providers or other tool types.
+
+All tests bypass the heavy MicroBot constructor (Docker environment, LLM
+creation) by constructing an uninitialized instance with ``object.__new__``
+and manually setting only the attributes the method under test needs.
+"""
+import sys
+import os
+import logging
+import pytest
+from unittest.mock import patch, Mock
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")))
+
+from microbots.MicroBot import MicroBot
+from microbots.constants import ModelProvider
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
+from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _bare_microbot(model_provider: str, tools: list) -> MicroBot:
+    """Return an uninitialized MicroBot with only the attributes that
+    ``_upgrade_tools_for_provider`` inspects."""
+    bot = object.__new__(MicroBot)
+    bot.model_provider = model_provider
+    bot.additional_tools = list(tools)
+    return bot
+
+
+def _memory_tool(tmp_path, instructions: str = "default instructions") -> MemoryTool:
+    return MemoryTool(
+        memory_dir=str(tmp_path / "memory"),
+        usage_instructions_to_llm=instructions,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestUpgradeToolsForProvider:
+
+    # -- Anthropic provider: MemoryTool → AnthropicMemoryTool ---------------
+
+    def test_memory_tool_is_replaced_with_anthropic_variant(self, tmp_path):
+        tool = _memory_tool(tmp_path)
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool])
+
+        bot._upgrade_tools_for_provider()
+
+        assert len(bot.additional_tools) == 1
+        assert isinstance(bot.additional_tools[0], AnthropicMemoryTool)
+
+    def test_memory_dir_is_forwarded_to_upgraded_tool(self, tmp_path):
+        mem_dir = str(tmp_path / "my_memory")
+        tool = MemoryTool(memory_dir=mem_dir)
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool])
+
+        bot._upgrade_tools_for_provider()
+
+        upgraded = bot.additional_tools[0]
+        assert isinstance(upgraded, AnthropicMemoryTool)
+        assert str(upgraded.memory_dir) == mem_dir
+
+    def test_usage_instructions_are_forwarded_to_upgraded_tool(self, tmp_path):
+        custom_instructions = "custom memory instructions for test"
+        tool = _memory_tool(tmp_path, instructions=custom_instructions)
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool])
+
+        bot._upgrade_tools_for_provider()
+
+        upgraded = bot.additional_tools[0]
+        assert upgraded.usage_instructions_to_llm == custom_instructions
+
+    def test_already_anthropic_memory_tool_is_not_re_upgraded(self, tmp_path):
+        existing = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory"))
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [existing])
+
+        bot._upgrade_tools_for_provider()
+
+        assert len(bot.additional_tools) == 1
+        assert bot.additional_tools[0] is existing
+
+    def test_non_memory_tools_are_kept_unchanged(self, tmp_path):
+        other_tool = Mock()
+        other_tool.__class__ = Mock  # not a MemoryTool subclass
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [other_tool])
+
+        bot._upgrade_tools_for_provider()
+
+        assert len(bot.additional_tools) == 1
+        assert bot.additional_tools[0] is other_tool
+
+    def test_mixed_tool_list_upgrades_only_memory_tools(self, tmp_path):
+        plain_memory = _memory_tool(tmp_path)
+        already_upgraded = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory2"))
+        other_tool = Mock(spec=[])
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [plain_memory, already_upgraded, other_tool])
+
+        bot._upgrade_tools_for_provider()
+
+        assert len(bot.additional_tools) == 3
+        # first: should have been upgraded
+        assert isinstance(bot.additional_tools[0], AnthropicMemoryTool)
+        assert bot.additional_tools[0] is not plain_memory
+        # second: already AnthropicMemoryTool, untouched
+        assert bot.additional_tools[1] is already_upgraded
+        # third: non-memory tool, untouched
+        assert bot.additional_tools[2] is other_tool
+
+    def test_empty_tool_list_is_a_no_op(self):
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [])
+
+        bot._upgrade_tools_for_provider()
+
+        assert bot.additional_tools == []
+
+    def test_logger_info_called_for_each_upgraded_tool(self, tmp_path, caplog):
+        tool1 = _memory_tool(tmp_path)
+        tmp_path2 = tmp_path / "sub"
+        tmp_path2.mkdir()
+        tool2 = _memory_tool(tmp_path2)
+        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool1, tool2])
+
+        with caplog.at_level(logging.INFO, logger=" MicroBot "):
+            bot._upgrade_tools_for_provider()
+
+        upgrade_logs = [r for r in caplog.records if "Auto-upgrading" in r.message]
+        assert len(upgrade_logs) == 2
+
+    # -- Non-Anthropic providers: no upgrade should happen ------------------
+
+    @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL])
+    def test_no_upgrade_for_non_anthropic_provider(self, tmp_path, provider):
+        tool = _memory_tool(tmp_path)
+        bot = _bare_microbot(provider, [tool])
+
+        bot._upgrade_tools_for_provider()
+
+        assert len(bot.additional_tools) == 1
+        assert isinstance(bot.additional_tools[0], MemoryTool)
+        assert not isinstance(bot.additional_tools[0], AnthropicMemoryTool)
+
+    @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL])
+    def test_original_tool_identity_preserved_for_non_anthropic(self, tmp_path, provider):
+        tool = _memory_tool(tmp_path)
+        bot = _bare_microbot(provider, [tool])
+
+        bot._upgrade_tools_for_provider()
+
+        assert bot.additional_tools[0] is tool
diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py
new file mode 100644
index 0000000..6668bfc
--- /dev/null
+++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py
@@ -0,0 +1,347 @@
+"""
+Unit tests for AnthropicMemoryTool.
+
+Covers:
+  - __init__: memory_dir / usage_instructions forwarding and defaults
+  - is_model_supported
+  - is_invoked
+  - clear_all / clear_all_memory (SDK override)
+  - SDK overrides: view, create, str_replace, insert, delete, rename
+    (happy-path + RuntimeError on failure)
+"""
+import logging
+import pytest
+
+from anthropic.types.beta import (
+    BetaMemoryTool20250818CreateCommand,
+    BetaMemoryTool20250818DeleteCommand,
+    BetaMemoryTool20250818InsertCommand,
+    BetaMemoryTool20250818RenameCommand,
+    BetaMemoryTool20250818StrReplaceCommand,
+    BetaMemoryTool20250818ViewCommand,
+)
+
+from microbots.tools.tool_definitions.anthropic_memory_tool import (
+    DEFAULT_MEMORY_INSTRUCTIONS,
+    AnthropicMemoryTool,
+)
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_tool(tmp_path) -> AnthropicMemoryTool:
+    return AnthropicMemoryTool(memory_dir=str(tmp_path / "memory"))
+
+
+# ---------------------------------------------------------------------------
+# __init__
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolInit:
+
+    def test_is_subclass_of_memory_tool(self, tmp_path):
+        assert isinstance(make_tool(tmp_path), MemoryTool)
+
+    def test_memory_dir_is_forwarded(self, tmp_path):
+        mem_dir = str(tmp_path / "my_memory")
+        tool = AnthropicMemoryTool(memory_dir=mem_dir)
+        assert str(tool._memory_dir) == mem_dir
+
+    def test_memory_dir_is_created_on_init(self, tmp_path):
+        mem_dir = tmp_path / "new_memory"
+        assert not mem_dir.exists()
+        AnthropicMemoryTool(memory_dir=str(mem_dir))
+        assert mem_dir.exists()
+
+    def test_default_memory_dir_under_home(self, monkeypatch, tmp_path):
+        from pathlib import Path
+        monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path))
+        tool = AnthropicMemoryTool()
+        assert tool._memory_dir == tmp_path / ".microbots" / "memory"
+
+    def test_custom_usage_instructions_are_stored(self, tmp_path):
+        custom = "custom instructions"
+        tool = AnthropicMemoryTool(
+            memory_dir=str(tmp_path / "memory"),
+            usage_instructions=custom,
+        )
+        assert tool.usage_instructions_to_llm == custom
+
+    def test_default_usage_instructions_are_applied_when_none(self, tmp_path):
+        tool = make_tool(tmp_path)
+        assert tool.usage_instructions_to_llm == DEFAULT_MEMORY_INSTRUCTIONS
+
+
+# ---------------------------------------------------------------------------
+# is_model_supported
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolIsModelSupported:
+
+    def test_returns_true_for_claude_models(self, tmp_path):
+        tool = make_tool(tmp_path)
+        for model in ("claude-3-sonnet", "claude-3-5-haiku", "Claude-Opus-4"):
+            assert tool.is_model_supported(model) is True
+
+    def test_returns_false_for_non_claude_models(self, tmp_path):
+        tool = make_tool(tmp_path)
+        for model in ("gpt-4", "ollama/llama3", "azure-openai/gpt-5", ""):
+            assert tool.is_model_supported(model) is False
+
+
+# ---------------------------------------------------------------------------
+# is_invoked
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolIsInvoked:
+
+    def test_always_returns_false(self, tmp_path):
+        tool = make_tool(tmp_path)
+        for cmd in ("memory view /memories", "memory clear", "anything", ""):
+            assert tool.is_invoked(cmd) is False
+
+
+# ---------------------------------------------------------------------------
+# clear_all / clear_all_memory
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolClearAll:
+
+    def test_clear_all_removes_all_files(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "a.md").write_text("a")
+        (tool._memory_dir / "b.md").write_text("b")
+
+        tool.clear_all()
+
+        assert list(tool._memory_dir.iterdir()) == []
+
+    def test_clear_all_leaves_memory_dir_intact(self, tmp_path):
+        tool = make_tool(tmp_path)
+        tool.clear_all()
+        assert tool._memory_dir.exists()
+
+    def test_clear_all_logs_info(self, tmp_path, caplog):
+        tool = make_tool(tmp_path)
+        with caplog.at_level(logging.INFO):
+            tool.clear_all()
+        assert "AnthropicMemoryTool" in caplog.text
+        assert "cleared" in caplog.text
+
+    def test_clear_all_memory_returns_string(self, tmp_path):
+        tool = make_tool(tmp_path)
+        result = tool.clear_all_memory()
+        assert result == "All memory cleared"
+
+    def test_clear_all_memory_removes_files(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("data")
+
+        tool.clear_all_memory()
+
+        assert list(tool._memory_dir.iterdir()) == []
+
+
+# ---------------------------------------------------------------------------
+# view (SDK override)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolView:
+
+    def test_view_returns_file_contents(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "notes.md").write_text("hello\nworld\n")
+
+        cmd = BetaMemoryTool20250818ViewCommand(
+            command="view", path="/memories/notes.md", view_range=None
+        )
+        result = tool.view(cmd)
+
+        assert "hello" in result
+        assert "world" in result
+
+    def test_view_with_view_range(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n")
+
+        cmd = BetaMemoryTool20250818ViewCommand(
+            command="view", path="/memories/f.md", view_range=[2, 4]
+        )
+        result = tool.view(cmd)
+
+        assert "b" in result
+        assert "d" in result
+        assert "a" not in result
+        assert "e" not in result
+
+    def test_view_raises_runtime_error_on_failure(self, tmp_path):
+        tool = make_tool(tmp_path)
+        cmd = BetaMemoryTool20250818ViewCommand(
+            command="view", path="/memories/nonexistent.md", view_range=None
+        )
+        with pytest.raises(RuntimeError):
+            tool.view(cmd)
+
+
+# ---------------------------------------------------------------------------
+# create (SDK override)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolCreate:
+
+    def test_create_writes_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        cmd = BetaMemoryTool20250818CreateCommand(
+            command="create", path="/memories/new.md", file_text="hello world"
+        )
+        result = tool.create(cmd)
+
+        assert "new.md" in result
+        assert (tool._memory_dir / "new.md").read_text() == "hello world"
+
+    def test_create_raises_runtime_error_on_failure(self, tmp_path):
+        tool = make_tool(tmp_path)
+        # Path traversal should cause _create to fail via _resolve
+        cmd = BetaMemoryTool20250818CreateCommand(
+            command="create", path="/memories/../../etc/evil.md", file_text="x"
+        )
+        with pytest.raises((RuntimeError, ValueError)):
+            tool.create(cmd)
+
+
+# ---------------------------------------------------------------------------
+# str_replace (SDK override)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolStrReplace:
+
+    def test_str_replace_edits_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello world")
+
+        cmd = BetaMemoryTool20250818StrReplaceCommand(
+            command="str_replace",
+            path="/memories/f.md",
+            old_str="hello",
+            new_str="goodbye",
+        )
+        result = tool.str_replace(cmd)
+
+        assert "f.md" in result
+        assert (tool._memory_dir / "f.md").read_text() == "goodbye world"
+
+    def test_str_replace_raises_runtime_error_on_failure(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello world")
+        cmd = BetaMemoryTool20250818StrReplaceCommand(
+            command="str_replace",
+            path="/memories/f.md",
+            old_str="not present",
+            new_str="x",
+        )
+        with pytest.raises(RuntimeError):
+            tool.str_replace(cmd)
+
+
+# ---------------------------------------------------------------------------
+# insert (SDK override)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolInsert:
+
+    def test_insert_prepends_line(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\nline2\n")
+
+        cmd = BetaMemoryTool20250818InsertCommand(
+            command="insert",
+            path="/memories/f.md",
+            insert_line=0,
+            insert_text="prepended",
+        )
+        result = tool.insert(cmd)
+
+        assert "0" in result or "prepended" in result or "f.md" in result
+        lines = (tool._memory_dir / "f.md").read_text().splitlines()
+        assert lines[0] == "prepended"
+
+    def test_insert_raises_runtime_error_on_failure(self, tmp_path):
+        tool = make_tool(tmp_path)
+        cmd = BetaMemoryTool20250818InsertCommand(
+            command="insert",
+            path="/memories/missing.md",
+            insert_line=0,
+            insert_text="x",
+        )
+        with pytest.raises(RuntimeError):
+            tool.insert(cmd)
+
+
+# ---------------------------------------------------------------------------
+# delete (SDK override)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolDelete:
+
+    def test_delete_removes_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("data")
+
+        cmd = BetaMemoryTool20250818DeleteCommand(
+            command="delete", path="/memories/f.md"
+        )
+        tool.delete(cmd)
+
+        assert not (tool._memory_dir / "f.md").exists()
+
+    def test_delete_raises_runtime_error_on_failure(self, tmp_path):
+        tool = make_tool(tmp_path)
+        cmd = BetaMemoryTool20250818DeleteCommand(
+            command="delete", path="/memories/nonexistent.md"
+        )
+        with pytest.raises(RuntimeError):
+            tool.delete(cmd)
+
+
+# ---------------------------------------------------------------------------
+# rename (SDK override)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolRename:
+
+    def test_rename_moves_file(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "old.md").write_text("content")
+
+        cmd = BetaMemoryTool20250818RenameCommand(
+            command="rename",
+            old_path="/memories/old.md",
+            new_path="/memories/new.md",
+        )
+        tool.rename(cmd)
+
+        assert not (tool._memory_dir / "old.md").exists()
+        assert (tool._memory_dir / "new.md").read_text() == "content"
+
+    def test_rename_raises_runtime_error_on_failure(self, tmp_path):
+        tool = make_tool(tmp_path)
+        cmd = BetaMemoryTool20250818RenameCommand(
+            command="rename",
+            old_path="/memories/missing.md",
+            new_path="/memories/new.md",
+        )
+        with pytest.raises(RuntimeError):
+            tool.rename(cmd)
diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py
index 2c317f6..5679d5e 100644
--- a/test/tools/tool_definitions/test_memory_tool.py
+++ b/test/tools/tool_definitions/test_memory_tool.py
@@ -98,6 +98,20 @@ def test_resolve_rejects_non_memory_paths(self, tmp_path):
             with pytest.raises(ValueError):
                 tool._resolve(bad)
 
+    def test_resolve_bare_relative_path_treated_as_relative_to_memory_dir(self, tmp_path):
+        """The else branch: a path without a /memories/ prefix is resolved
+        relative to memory_dir."""
+        tool = make_tool(tmp_path)
+        resolved = tool._resolve("notes.md")
+        assert resolved == (tool._memory_dir / "notes.md").resolve()
+
+    def test_resolve_bare_relative_subdir_path(self, tmp_path):
+        """A bare relative path with subdirectory components is also resolved
+        relative to memory_dir (else branch)."""
+        tool = make_tool(tmp_path)
+        resolved = tool._resolve("sub/dir/file.md")
+        assert resolved == (tool._memory_dir / "sub" / "dir" / "file.md").resolve()
+
 
 # ---------------------------------------------------------------------------
 # _view
@@ -231,6 +245,13 @@ def test_str_replace_missing_flags_returns_error(self, tmp_path):
         result = tool._str_replace(["/memories/f.md"])
         assert result.return_code != 0
 
+    def test_str_replace_empty_args_returns_usage_error(self, tmp_path):
+        """if not args branch: calling _str_replace([]) returns the usage message."""
+        tool = make_tool(tmp_path)
+        result = tool._str_replace([])
+        assert result.return_code == 1
+        assert "Usage: memory str_replace" in result.stderr
+
     def test_str_replace_nonexistent_file_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
         result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"])
@@ -284,6 +305,13 @@ def test_insert_missing_flags_returns_error(self, tmp_path):
         result = tool._insert(["/memories/f.md"])
         assert result.return_code != 0
 
+    def test_insert_empty_args_returns_usage_error(self, tmp_path):
+        """if not args branch: calling _insert([]) returns the usage message."""
+        tool = make_tool(tmp_path)
+        result = tool._insert([])
+        assert result.return_code == 1
+        assert "Usage: memory insert" in result.stderr
+
 
 # ---------------------------------------------------------------------------
 # _delete
@@ -394,6 +422,19 @@ def test_clear_leaves_memory_dir_intact(self, tmp_path):
         assert tool._memory_dir.exists()
 
 
+# ---------------------------------------------------------------------------
+# is_model_supported
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolIsModelSupported:
+
+    def test_returns_true_for_any_model(self, tmp_path):
+        tool = make_tool(tmp_path)
+        for model in ("gpt-4", "claude-3-sonnet", "ollama/llama3", ""):
+            assert tool.is_model_supported(model) is True
+
+
 # ---------------------------------------------------------------------------
 # invoke — full command dispatch
 # ---------------------------------------------------------------------------
@@ -444,6 +485,69 @@ def test_invoke_handles_bad_quoting_gracefully(self, tmp_path):
         assert result.return_code != 0
         assert "Parse error" in result.stderr
 
+    def test_invoke_str_replace_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello world")
+
+        result = tool.invoke(
+            'memory str_replace /memories/f.md --old "hello" --new "goodbye"',
+            parent_bot=Mock(),
+        )
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "f.md").read_text() == "goodbye world"
+
+    def test_invoke_insert_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\nline2\n")
+
+        result = tool.invoke(
+            'memory insert /memories/f.md --line 0 --text "prepended"',
+            parent_bot=Mock(),
+        )
+
+        assert result.return_code == 0
+        lines = (tool._memory_dir / "f.md").read_text().splitlines()
+        assert lines[0] == "prepended"
+
+    def test_invoke_delete_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+        f = tool._memory_dir / "f.md"
+        f.write_text("data")
+
+        result = tool.invoke("memory delete /memories/f.md", parent_bot=Mock())
+
+        assert result.return_code == 0
+        assert not f.exists()
+
+    def test_invoke_rename_subcommand(self, tmp_path):
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "old.md").write_text("content")
+
+        result = tool.invoke(
+            "memory rename /memories/old.md /memories/new.md",
+            parent_bot=Mock(),
+        )
+
+        assert result.return_code == 0
+        assert (tool._memory_dir / "new.md").read_text() == "content"
+        assert not (tool._memory_dir / "old.md").exists()
+
+    def test_invoke_exception_returned_as_error_cmdreturn(self, tmp_path):
+        """ValueError/FileNotFoundError/RuntimeError raised inside a subcommand
+        are caught and returned as a CmdReturn with return_code=1."""
+        tool = make_tool(tmp_path)
+
+        # str_replace on a non-existent file raises FileNotFoundError
+        result = tool.invoke(
+            'memory str_replace /memories/missing.md --old "x" --new "y"',
+            parent_bot=Mock(),
+        )
+
+        assert result.return_code == 1
+        assert result.stdout == ""
+        assert result.stderr != ""
+
 
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])

From b4d753e938ac4861970427068253df19009f8969 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Thu, 5 Mar 2026 09:23:30 +0000
Subject: [PATCH 07/20] Add tests for coverage

---
 .../test_anthropic_memory_tool.py             | 15 +++++--
 .../tool_definitions/test_memory_tool.py      | 40 ++++++++++++++++---
 2 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py
index 6668bfc..db7069e 100644
--- a/test/tools/tool_definitions/test_anthropic_memory_tool.py
+++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py
@@ -209,13 +209,20 @@ def test_create_writes_file(self, tmp_path):
         assert (tool._memory_dir / "new.md").read_text() == "hello world"
 
     def test_create_raises_runtime_error_on_failure(self, tmp_path):
+        """Ensures the `raise RuntimeError(result.stderr)` branch is exercised by
+        mocking _create to return a non-zero CmdReturn."""
+        from unittest.mock import patch
+        from microbots.environment.Environment import CmdReturn
+
         tool = make_tool(tmp_path)
-        # Path traversal should cause _create to fail via _resolve
         cmd = BetaMemoryTool20250818CreateCommand(
-            command="create", path="/memories/../../etc/evil.md", file_text="x"
+            command="create", path="/memories/new.md", file_text="x"
         )
-        with pytest.raises((RuntimeError, ValueError)):
-            tool.create(cmd)
+        with patch.object(
+            tool, "_create", return_value=CmdReturn(stdout="", stderr="disk full", return_code=1)
+        ):
+            with pytest.raises(RuntimeError, match="disk full"):
+                tool.create(cmd)
 
 
 # ---------------------------------------------------------------------------
diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py
index 5679d5e..089ca52 100644
--- a/test/tools/tool_definitions/test_memory_tool.py
+++ b/test/tools/tool_definitions/test_memory_tool.py
@@ -166,6 +166,14 @@ def test_view_no_args_returns_error(self, tmp_path):
         result = tool._view([])
         assert result.return_code != 0
 
+    def test_view_unknown_flag_is_skipped(self, tmp_path):
+        """else: i += 1 — unrecognised flags are silently skipped."""
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello\n")
+        result = tool._view(["/memories/f.md", "--bogus", "value"])
+        assert result.return_code == 0
+        assert "hello" in result.stdout
+
 
 # ---------------------------------------------------------------------------
 # _create
@@ -257,6 +265,16 @@ def test_str_replace_nonexistent_file_returns_error(self, tmp_path):
         result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"])
         assert result.return_code != 0
 
+    def test_str_replace_unknown_flag_is_skipped(self, tmp_path):
+        """else: i += 1 — unrecognised flags in the arg loop are silently skipped."""
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello world")
+        result = tool._str_replace([
+            "/memories/f.md", "--bogus", "ignored", "--old", "hello", "--new", "goodbye"
+        ])
+        assert result.return_code == 0
+        assert (tool._memory_dir / "f.md").read_text() == "goodbye world"
+
 
 # ---------------------------------------------------------------------------
 # _insert
@@ -312,6 +330,17 @@ def test_insert_empty_args_returns_usage_error(self, tmp_path):
         assert result.return_code == 1
         assert "Usage: memory insert" in result.stderr
 
+    def test_insert_unknown_flag_is_skipped(self, tmp_path):
+        """else: i += 1 — unrecognised flags in the arg loop are silently skipped."""
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("line1\nline2\n")
+        result = tool._insert([
+            "/memories/f.md", "--bogus", "ignored", "--line", "0", "--text", "prepended"
+        ])
+        assert result.return_code == 0
+        lines = (tool._memory_dir / "f.md").read_text().splitlines()
+        assert lines[0] == "prepended"
+
 
 # ---------------------------------------------------------------------------
 # _delete
@@ -534,19 +563,20 @@ def test_invoke_rename_subcommand(self, tmp_path):
         assert not (tool._memory_dir / "old.md").exists()
 
     def test_invoke_exception_returned_as_error_cmdreturn(self, tmp_path):
-        """ValueError/FileNotFoundError/RuntimeError raised inside a subcommand
-        are caught and returned as a CmdReturn with return_code=1."""
+        """except (ValueError, FileNotFoundError, RuntimeError) block:
+        a path-traversal path causes _resolve() to raise ValueError inside a
+        subcommand handler, which is caught and returned as CmdReturn(return_code=1)."""
         tool = make_tool(tmp_path)
 
-        # str_replace on a non-existent file raises FileNotFoundError
+        # Path traversal triggers ValueError inside _view → caught by except block
         result = tool.invoke(
-            'memory str_replace /memories/missing.md --old "x" --new "y"',
+            "memory view /memories/../../etc/passwd",
             parent_bot=Mock(),
         )
 
         assert result.return_code == 1
         assert result.stdout == ""
-        assert result.stderr != ""
+        assert "traversal" in result.stderr.lower() or result.stderr != ""
 
 
 if __name__ == "__main__":

From 579e6a349355077676ea59211fe7ffc2f7ad6dcd Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Tue, 10 Mar 2026 11:54:18 +0000
Subject: [PATCH 08/20] move tool dispatch out of AnthropicApi into MicroBot
 run loop

---
 pytest.ini                                    |   1 +
 src/microbots/MicroBot.py                     |  39 +-
 src/microbots/llm/anthropic_api.py            | 117 +++--
 src/microbots/llm/llm.py                      |   9 +
 .../tool_definitions/anthropic_memory_tool.py |  37 +-
 test/bot/test_memory_tool_integration.py      | 439 ++++++++++++++++++
 test/bot/test_upgrade_tools_for_provider.py   | 110 ++---
 test/llm/test_anthropic_api.py                | 289 +++++-------
 .../test_anthropic_memory_tool.py             |  14 +-
 9 files changed, 710 insertions(+), 345 deletions(-)
 create mode 100644 test/bot/test_memory_tool_integration.py

diff --git a/pytest.ini b/pytest.ini
index d0e4f1f..80758ec 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -11,5 +11,6 @@ addopts =
 markers =
     unit: Unit tests
     integration: Integration tests
+    anthropic_integration: Integration tests requiring a real Anthropic API key
     slow: Slow tests
     docker: marks tests that require a running Docker daemon and pull container images
diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py
index 7b061d7..f2708b1 100644
--- a/src/microbots/MicroBot.py
+++ b/src/microbots/MicroBot.py
@@ -317,38 +317,7 @@ def _create_environment(self, folder_to_mount: Optional[Mount]):
             folder_to_mount=folder_to_mount,
         )
 
-    def _upgrade_tools_for_provider(self):
-        """Auto-upgrade provider-agnostic tools to their provider-optimised variants.
-
-        Currently: replaces any ``MemoryTool`` with ``AnthropicMemoryTool`` when
-        the provider is Anthropic so the model gets native structured tool-use
-        instead of the text-command loop.  The ``memory_dir`` and any custom
-        ``usage_instructions_to_llm`` are forwarded to the upgraded instance.
-        """
-        if self.model_provider != ModelProvider.ANTHROPIC:
-            return
-
-        # Local imports to avoid pulling Anthropic SDK into non-Anthropic paths
-        from microbots.tools.tool_definitions.memory_tool import MemoryTool
-        from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
-
-        upgraded = []
-        for tool in self.additional_tools:
-            if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool):
-                logger.info(
-                    "🧠 Auto-upgrading MemoryTool → AnthropicMemoryTool for Anthropic provider"
-                )
-                upgraded.append(AnthropicMemoryTool(
-                    memory_dir=tool.memory_dir,
-                    usage_instructions=tool.usage_instructions_to_llm,
-                ))
-            else:
-                upgraded.append(tool)
-        self.additional_tools = upgraded
-
     def _create_llm(self):
-        self._upgrade_tools_for_provider()
-
         # Append tool usage instructions to system prompt
         system_prompt_with_tools = self.system_prompt if self.system_prompt else ""
         if self.additional_tools:
@@ -365,16 +334,10 @@ def _create_llm(self):
                 system_prompt=system_prompt_with_tools, model_name=self.deployment_name
             )
         elif self.model_provider == ModelProvider.ANTHROPIC:
-            # Detect Anthropic-native tools (e.g. AnthropicMemoryTool) by duck-typing:
-            # any tool that exposes both to_dict() and call() is a native Anthropic tool.
-            native_tools = [
-                t for t in self.additional_tools
-                if callable(getattr(t, "to_dict", None)) and callable(getattr(t, "call", None))
-            ]
             self.llm = AnthropicApi(
                 system_prompt=system_prompt_with_tools,
                 deployment_name=self.deployment_name,
-                native_tools=native_tools or None,
+                additional_tools=self.additional_tools,
             )
         # No Else case required as model provider is already validated using _validate_model_and_provider
 
diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py
index 4a73ba5..a403f9c 100644
--- a/src/microbots/llm/anthropic_api.py
+++ b/src/microbots/llm/anthropic_api.py
@@ -21,12 +21,31 @@
 
 class AnthropicApi(LLMInterface):
 
+    def upgrade_tools(self, tools: list) -> list:
+        """Replace ``MemoryTool`` with ``AnthropicMemoryTool`` for native tool-use."""
+        from microbots.tools.tool_definitions.memory_tool import MemoryTool
+        from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
+
+        upgraded = []
+        for tool in tools:
+            if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool):
+                logger.info(
+                    "\U0001f9e0 Auto-upgrading MemoryTool \u2192 AnthropicMemoryTool for Anthropic provider"
+                )
+                upgraded.append(AnthropicMemoryTool(
+                    memory_dir=tool.memory_dir,
+                    usage_instructions=tool.usage_instructions_to_llm,
+                ))
+            else:
+                upgraded.append(tool)
+        return upgraded
+
     def __init__(
         self,
         system_prompt: str,
         deployment_name: str = deployment_name,
         max_retries: int = 3,
-        native_tools: Optional[List] = None,
+        additional_tools: Optional[List] = None,
     ):
         """
         Parameters
@@ -37,11 +56,10 @@ def __init__(
             The Anthropic model deployment name.
         max_retries : int
             Maximum number of retries for invalid LLM responses.
-        native_tools : Optional[List]
-            Anthropic-native tool objects (e.g. ``AnthropicMemoryTool``) that
-            have both a ``to_dict()`` and a ``call()`` method.  These are passed
-            directly to the API and their tool-use blocks are dispatched here
-            before the JSON response is returned to the caller.
+        additional_tools : Optional[List]
+            Tool objects passed from MicroBot.  Any provider-agnostic tools
+            (e.g. ``MemoryTool``) are silently upgraded to their Anthropic-
+            native variants, and their API schemas are extracted.
         """
         self.ai_client = Anthropic(
             api_key=api_key,
@@ -50,12 +68,18 @@ def __init__(
         self.deployment_name = deployment_name
         self.system_prompt = system_prompt
         self.messages = []
-        self.native_tools = native_tools or []
-        # Cache tool dicts once so _call_api and _dispatch_tool_use don't
-        # re-serialise on every invocation (important when multiple native
-        # tools are registered, e.g. memory + bash).
-        self._native_tool_dicts = [t.to_dict() for t in self.native_tools]
-        self._native_tools_by_name = {d["name"]: t for d, t in zip(self._native_tool_dicts, self.native_tools)}
+
+        # Silently upgrade tools in-place and extract API schemas
+        tools = additional_tools or []
+        upgraded = self.upgrade_tools(tools)
+        # Mutate the original list so the caller (MicroBot) sees upgraded tools
+        if additional_tools is not None:
+            additional_tools[:] = upgraded
+        self._tool_dicts = [
+            t.to_dict() for t in upgraded
+            if callable(getattr(t, "to_dict", None))
+        ]
+        self._pending_tool_response = None
 
         # Set these values here. This logic will be handled in the parent class.
         self.max_retries = max_retries
@@ -66,7 +90,7 @@ def __init__(
     # ---------------------------------------------------------------------- #
 
     def _call_api(self) -> object:
-        """Call the Anthropic messages API, including native tools when present."""
+        """Call the Anthropic messages API, including tool definitions when present."""
         kwargs = dict(
             model=self.deployment_name,
             system=self.system_prompt,
@@ -74,44 +98,24 @@ def _call_api(self) -> object:
             max_tokens=4096,
         )
 
-        if self.native_tools:
-            kwargs["tools"] = self._native_tool_dicts
+        if self._tool_dicts:
+            kwargs["tools"] = self._tool_dicts
 
         return self.ai_client.messages.create(**kwargs)
 
-    def _dispatch_tool_use(self, response) -> None:
-        """Handle a tool_use response: execute each tool call and append results.
+    def _append_tool_result(self, response, result_text: str) -> None:
+        """Append the assistant tool_use turn and the corresponding tool_result user turn.
 
-        Mutates ``self.messages`` in place — appends the assistant turn (with
-        all content blocks) and the corresponding tool_result user turn.
+        Called when the caller provides the tool execution result via
+        the next ``ask()`` call.
         """
-        # Append the full assistant message as-is (content is a list of blocks)
         assistant_content = [block.model_dump() for block in response.content]
         self.messages.append({"role": "assistant", "content": assistant_content})
 
-        # Build tool_result entries for every tool_use block
         tool_results = []
         for block in response.content:
             if block.type != "tool_use":
                 continue
-
-            # Find the matching native tool by name
-            tool = self._native_tools_by_name.get(block.name)
-            if tool is None:
-                result_text = f"Error: unknown tool '{block.name}'"
-                logger.error("Received tool_use for unknown tool: %s", block.name)
-            else:
-                try:
-                    result_text = tool.call(block.input)
-                    logger.info(
-                        "🧠 Native tool '%s' executed. Result (first 200 chars): %s",
-                        block.name,
-                        str(result_text)[:200],
-                    )
-                except Exception as exc:
-                    result_text = f"Error executing tool '{block.name}': {exc}"
-                    logger.error("Native tool '%s' raised: %s", block.name, exc)
-
             tool_results.append({
                 "type": "tool_result",
                 "tool_use_id": block.id,
@@ -127,18 +131,39 @@ def _dispatch_tool_use(self, response) -> None:
     def ask(self, message: str) -> LLMAskResponse:
         self.retries = 0  # reset retries for each ask. Handled in parent class.
 
-        self.messages.append({"role": "user", "content": message})
+        if self._pending_tool_response:
+            # Previous response was tool_use — format this message as tool results.
+            self._append_tool_result(self._pending_tool_response, message)
+            self._pending_tool_response = None
+        else:
+            self.messages.append({"role": "user", "content": message})
 
         valid = False
         while not valid:
             response = self._call_api()
 
-            # Dispatch any tool_use rounds before looking for a JSON response.
-            # The model may call the memory tool multiple times before producing
-            # its final JSON command.
-            while response.stop_reason == "tool_use":
-                self._dispatch_tool_use(response)
-                response = self._call_api()
+            if response.stop_reason == "tool_use":
+                # Return tool call info as an LLMAskResponse so the
+                # caller (MicroBot.run) can dispatch the tool.
+                self._pending_tool_response = response
+
+                thoughts = ""
+                for block in response.content:
+                    if block.type == "text":
+                        thoughts = block.text
+                        break
+
+                tool_calls = []
+                for block in response.content:
+                    if block.type == "tool_use":
+                        tool_calls.append({
+                            "name": block.name,
+                            "id": block.id,
+                            "input": block.input,
+                        })
+
+                command = json.dumps({"native_tool_calls": tool_calls})
+                return LLMAskResponse(task_done=False, thoughts=thoughts, command=command)
 
             # Extract text content from the final response
             response_text = ""
diff --git a/src/microbots/llm/llm.py b/src/microbots/llm/llm.py
index 2800790..e7e5c22 100644
--- a/src/microbots/llm/llm.py
+++ b/src/microbots/llm/llm.py
@@ -29,6 +29,15 @@ def ask(self, message: str) -> LLMAskResponse:
     def clear_history(self) -> bool:
         pass
 
+    def upgrade_tools(self, tools: list) -> list:
+        """Upgrade tools for the specific LLM provider.
+
+        The default implementation is a no-op.  Subclasses (e.g.
+        ``AnthropicApi``) override this to swap provider-agnostic tools
+        with their native equivalents.
+        """
+        return tools
+
     def _validate_llm_response(self, response: str) -> tuple[bool, LLMAskResponse]:
 
         if self.retries >= self.max_retries:
diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
index 9cf547e..ef26182 100644
--- a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
+++ b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
@@ -21,8 +21,9 @@
 The memory tool (type ``memory_20250818``) is available in the standard
 Anthropic library and does not require a beta endpoint or header.  Pass it
 via ``tools=[{"type": "memory_20250818", "name": "memory"}]`` on a regular
-``client.messages.create(...)`` call.  ``AnthropicApi`` handles this
-automatically when ``native_tools`` contains an ``AnthropicMemoryTool``.
+``client.messages.create(...)`` call.  ``MicroBot`` auto-upgrades
+``MemoryTool`` to ``AnthropicMemoryTool`` for Anthropic providers and
+passes the tool schema to ``AnthropicApi`` via ``tool_dicts``.
 
 Usage:
     from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
@@ -33,6 +34,7 @@
 
 from __future__ import annotations
 
+import json
 from logging import getLogger
 from pathlib import Path
 
@@ -48,6 +50,7 @@
     BetaMemoryTool20250818ViewCommand,
 )
 
+from microbots.environment.Environment import CmdReturn
 from microbots.tools.tool_definitions.memory_tool import MemoryTool
 
 logger = getLogger(__name__)
@@ -116,10 +119,36 @@ def is_model_supported(self, model_name: str) -> bool:
         return "claude" in model_name.lower()
 
     def is_invoked(self, command: str) -> bool:
-        """Return False — this tool is dispatched natively by AnthropicApi,
-        not via the shell command loop."""
+        """Return True when the command is a serialized native_tool_calls JSON
+        containing a call to the ``memory`` tool."""
+        try:
+            data = json.loads(command)
+            if "native_tool_calls" in data:
+                return any(tc["name"] == "memory" for tc in data["native_tool_calls"])
+        except (json.JSONDecodeError, KeyError, TypeError):
+            pass
         return False
 
+    def invoke(self, command: str, parent_bot) -> CmdReturn:
+        """Execute all memory tool calls in the serialized native_tool_calls batch."""
+        data = json.loads(command)
+        results = []
+        for tc in data["native_tool_calls"]:
+            if tc["name"] != "memory":
+                continue
+            try:
+                result = self.call(tc["input"])
+                logger.info(
+                    "\U0001f9e0 Native tool 'memory' executed. Result (first 200 chars): %s",
+                    str(result)[:200],
+                )
+                results.append(str(result))
+            except Exception as exc:
+                logger.error("Native tool 'memory' raised: %s", exc)
+                results.append(f"Error executing tool 'memory': {exc}")
+        combined = "\n".join(results)
+        return CmdReturn(stdout=combined, stderr="", return_code=0)
+
     def clear_all(self) -> None:
         """Delete all memory files (useful for testing or resetting state)."""
         self._clear()
diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py
new file mode 100644
index 0000000..25c717d
--- /dev/null
+++ b/test/bot/test_memory_tool_integration.py
@@ -0,0 +1,439 @@
+"""Tests for the Anthropic memory tool end-to-end flow.
+
+Unit tests (mocked API):
+  Verify wiring — auto-upgrade, tool dispatch, and memory file operations
+  with a mocked Anthropic client. Fast, free, no API key needed.
+
+Integration tests (real API):
+  Hit the actual Anthropic API to verify the full round-trip.
+  Gated behind ``@pytest.mark.anthropic_integration``.
+  Require ``ANTHROPIC_API_KEY`` in .env.
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+from unittest.mock import Mock, patch
+
+import pytest
+from dotenv import load_dotenv
+
+load_dotenv()
+
+sys.path.insert(
+    0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))
+)
+
+from microbots import MicroBot, BotRunResult
+from microbots.llm.llm import llm_output_format_str
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
+from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_tool_use_response(tool_name, tool_id, tool_input, thinking_text=""):
+    """Build a mock Anthropic API response with stop_reason='tool_use'."""
+    blocks = []
+
+    if thinking_text:
+        text_block = Mock()
+        text_block.type = "text"
+        text_block.text = thinking_text
+        blocks.append(text_block)
+
+    tool_block = Mock()
+    tool_block.type = "tool_use"
+    tool_block.name = tool_name
+    tool_block.id = tool_id
+    tool_block.input = tool_input
+    tool_block.model_dump = Mock(return_value={
+        "type": "tool_use",
+        "id": tool_id,
+        "name": tool_name,
+        "input": tool_input,
+    })
+    blocks.append(tool_block)
+
+    resp = Mock()
+    resp.stop_reason = "tool_use"
+    resp.content = blocks
+    return resp
+
+
+def _make_end_turn_response(task_done, thoughts, command=""):
+    """Build a mock Anthropic API response with stop_reason='end_turn'."""
+    payload = json.dumps({
+        "task_done": task_done,
+        "thoughts": thoughts,
+        "command": command,
+    })
+
+    text_block = Mock()
+    text_block.type = "text"
+    text_block.text = payload
+
+    resp = Mock()
+    resp.stop_reason = "end_turn"
+    resp.content = [text_block]
+    return resp
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolWiring:
+    """Unit tests — mocked Anthropic client, real tool dispatch and file ops."""
+
+    @pytest.fixture()
+    def memory_dir(self, tmp_path):
+        d = tmp_path / "memory"
+        d.mkdir()
+        return d
+
+    @pytest.fixture()
+    def bot(self, memory_dir):
+        """Create a MicroBot with Anthropic provider and a MemoryTool.
+
+        The Anthropic client is mocked, but the rest of the stack is real:
+        auto-upgrade, tool dispatch, and memory file operations.
+        """
+        tool = MemoryTool(
+            memory_dir=str(memory_dir),
+            usage_instructions_to_llm="Use the memory tool to persist notes.",
+        )
+
+        mock_env = Mock()
+        mock_env.execute.return_value = Mock(return_code=0, stdout="", stderr="")
+
+        anthropic_deployment = "claude-sonnet-4-5"
+
+        with patch("microbots.llm.anthropic_api.Anthropic") as mock_anthropic_cls, \
+             patch("microbots.llm.anthropic_api.api_key", "test-key"), \
+             patch("microbots.llm.anthropic_api.endpoint", "https://api.anthropic.com"), \
+             patch("microbots.llm.anthropic_api.deployment_name", anthropic_deployment):
+
+            bot = MicroBot(
+                model=f"anthropic/{anthropic_deployment}",
+                system_prompt="You are a helpful assistant.",
+                additional_tools=[tool],
+                environment=mock_env,
+            )
+
+            self._mock_client = mock_anthropic_cls.return_value
+            yield bot
+            del bot
+
+    # -- Upgrade verification -----------------------------------------------
+
+    def test_memory_tool_auto_upgraded_to_anthropic_variant(self, bot):
+        """MemoryTool passed to MicroBot should be auto-upgraded to AnthropicMemoryTool."""
+        upgraded_tools = bot.additional_tools
+        memory_tools = [t for t in upgraded_tools if isinstance(t, AnthropicMemoryTool)]
+        assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade"
+
+    def test_tool_dicts_include_memory_schema(self, bot):
+        """The LLM should have received the memory tool schema."""
+        assert len(bot.llm._tool_dicts) == 1
+        assert bot.llm._tool_dicts[0]["type"] == "memory_20250818"
+
+    # -- Create file via tool_use -------------------------------------------
+
+    def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir):
+        """LLM requests a memory create → MicroBot dispatches → file appears on disk."""
+        # Sequence:
+        # 1. ask(task) → API returns tool_use (memory create)
+        # 2. ask(tool_result) → API returns end_turn (task_done=True)
+        self._mock_client.messages.create.side_effect = [
+            _make_tool_use_response(
+                tool_name="memory",
+                tool_id="tool_001",
+                tool_input={
+                    "command": "create",
+                    "path": "/memories/notes.md",
+                    "file_text": "Hello from integration test",
+                },
+                thinking_text="I'll save a note to memory.",
+            ),
+            _make_end_turn_response(
+                task_done=True,
+                thoughts="Saved a note to memory successfully.",
+            ),
+        ]
+
+        result: BotRunResult = bot.run(
+            "Save a note saying 'Hello from integration test'",
+            max_iterations=5,
+            timeout_in_seconds=30,
+        )
+
+        assert result.status is True
+        assert result.error is None
+
+        # Verify the file was actually created on disk
+        # _resolve("/memories/notes.md") strips the "memories/" prefix → memory_dir/notes.md
+        created_file = memory_dir / "notes.md"
+        assert created_file.exists(), f"Expected {created_file} to be created"
+        assert created_file.read_text() == "Hello from integration test"
+
+    # -- View file via tool_use ---------------------------------------------
+
+    def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir):
+        """LLM requests a memory view → MicroBot dispatches → file content returned."""
+        # Pre-create a file in memory
+        # _resolve("/memories/existing.md") → memory_dir/existing.md
+        (memory_dir / "existing.md").write_text("Previously saved content")
+
+        self._mock_client.messages.create.side_effect = [
+            _make_tool_use_response(
+                tool_name="memory",
+                tool_id="tool_002",
+                tool_input={
+                    "command": "view",
+                    "path": "/memories/existing.md",
+                },
+                thinking_text="Let me check my memory.",
+            ),
+            _make_end_turn_response(
+                task_done=True,
+                thoughts="Found previously saved content in memory.",
+            ),
+        ]
+
+        result: BotRunResult = bot.run(
+            "Check your memory for existing notes",
+            max_iterations=5,
+            timeout_in_seconds=30,
+        )
+
+        assert result.status is True
+
+        # Verify the view result was passed back to the API as tool_result
+        calls = self._mock_client.messages.create.call_args_list
+        assert len(calls) == 2
+        # The second call should have messages including the tool_result
+        second_call_messages = calls[1].kwargs.get("messages") or calls[1][1].get("messages", [])
+        tool_result_msgs = [
+            m for m in second_call_messages
+            if m.get("role") == "user" and isinstance(m.get("content"), list)
+            and any(c.get("type") == "tool_result" for c in m["content"])
+        ]
+        assert len(tool_result_msgs) >= 1, "Expected a tool_result message in the second API call"
+        # The tool_result content should contain the file content
+        tool_result_content = tool_result_msgs[-1]["content"][0]["content"]
+        assert "Previously saved content" in tool_result_content
+
+    # -- Multiple tool calls in sequence ------------------------------------
+
+    def test_create_then_view_memory_file(self, bot, memory_dir):
+        """LLM creates a file, then views it — both dispatched via MicroBot loop."""
+        self._mock_client.messages.create.side_effect = [
+            # Step 1: create file
+            _make_tool_use_response(
+                tool_name="memory",
+                tool_id="tool_003",
+                tool_input={
+                    "command": "create",
+                    "path": "/memories/todo.md",
+                    "file_text": "- Fix bug #42\n- Write tests",
+                },
+                thinking_text="Creating a todo list.",
+            ),
+            # Step 2: view file
+            _make_tool_use_response(
+                tool_name="memory",
+                tool_id="tool_004",
+                tool_input={
+                    "command": "view",
+                    "path": "/memories/todo.md",
+                },
+                thinking_text="Let me verify what I wrote.",
+            ),
+            # Step 3: done
+            _make_end_turn_response(
+                task_done=True,
+                thoughts="Created and verified the todo list.",
+            ),
+        ]
+
+        result: BotRunResult = bot.run(
+            "Create a todo list and verify it was saved",
+            max_iterations=10,
+            timeout_in_seconds=30,
+        )
+
+        assert result.status is True
+        assert result.error is None
+
+        # File should exist with correct content
+        created_file = memory_dir / "todo.md"
+        assert created_file.exists()
+        assert "Fix bug #42" in created_file.read_text()
+
+    # -- Non-memory commands still go to environment ------------------------
+
+    def test_non_memory_commands_go_to_environment(self, bot):
+        """Regular shell commands should be dispatched to the environment, not the memory tool."""
+        self._mock_client.messages.create.side_effect = [
+            _make_end_turn_response(
+                task_done=False,
+                thoughts="Let me check the files.",
+                command="ls -la",
+            ),
+            _make_end_turn_response(
+                task_done=True,
+                thoughts="Done.",
+            ),
+        ]
+
+        result: BotRunResult = bot.run(
+            "List the files",
+            max_iterations=5,
+            timeout_in_seconds=30,
+        )
+
+        assert result.status is True
+        # The environment.execute should have been called with "ls -la"
+        bot.environment.execute.assert_called_with("ls -la")
+
+
+# ---------------------------------------------------------------------------
+# Real integration tests — require ANTHROPIC_API_KEY
+# ---------------------------------------------------------------------------
+
+MEMORY_SYSTEM_PROMPT = f"""You are a helpful assistant with access to a memory tool.
+You can save and retrieve notes using the memory tool.
+All your responses must be in this JSON format:
+{llm_output_format_str}
+The properties (task_done, thoughts, command) are mandatory on each response.
+When you are done, set task_done to true and command to an empty string.
+"""
+
+
+@pytest.mark.anthropic_integration
+@pytest.mark.docker
+class TestMemoryToolRealApi:
+    """End-to-end integration tests that hit the real Anthropic API.
+
+    These tests exercise the full MicroBot → AnthropicApi → memory tool
+    pipeline with no mocking.  A real Docker environment is created
+    (matching the AgentBoss integration test pattern).
+
+    Run with::
+
+        pytest -m anthropic_integration
+
+    Requires ``ANTHROPIC_API_KEY`` in ``.env``.
+    """
+
+    @pytest.fixture()
+    def memory_dir(self, tmp_path):
+        d = tmp_path / "memory"
+        d.mkdir()
+        return d
+
+    @pytest.fixture()
+    def memory_bot(self, memory_dir):
+        """Create a MicroBot with the real Anthropic API, real Docker env,
+        and a MemoryTool.  No mocking — fully end-to-end.
+        """
+        tool = MemoryTool(
+            memory_dir=str(memory_dir),
+            usage_instructions_to_llm="Use the memory tool to persist notes.",
+        )
+
+        anthropic_deployment = os.getenv("ANTHROPIC_DEPLOYMENT_NAME", "claude-sonnet-4-5")
+
+        bot = MicroBot(
+            model=f"anthropic/{anthropic_deployment}",
+            system_prompt=MEMORY_SYSTEM_PROMPT,
+            additional_tools=[tool],
+        )
+
+        yield bot
+        del bot
+
+    def test_memory_tool_auto_upgraded(self, memory_bot):
+        """MemoryTool should be silently auto-upgraded to AnthropicMemoryTool."""
+        memory_tools = [t for t in memory_bot.additional_tools if isinstance(t, AnthropicMemoryTool)]
+        assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade"
+
+    def test_create_memory_file(self, memory_bot, memory_dir):
+        """MicroBot should persist a debugging plan to memory.
+
+        The LLM is expected to:
+          1. Receive a task about planning a debugging session.
+          2. Decide to persist the plan using the memory tool.
+          3. Confirm the task is done.
+
+        We verify the plan was actually written to disk.
+        """
+        result: BotRunResult = memory_bot.run(
+            task=(
+                "You are investigating a bug where the server returns HTTP 500 "
+                "on POST /api/users. Create a debugging plan that includes: "
+                "1) check server logs, 2) reproduce the request with curl, "
+                "3) inspect the database connection. "
+                "Persist this plan so you can resume later if interrupted."
+            ),
+            max_iterations=10,
+            timeout_in_seconds=60,
+        )
+
+        assert result.status is True, f"Task failed: {result.error}"
+        assert result.error is None
+
+        # The LLM should have used the memory tool to persist the plan
+        saved_files = [f for f in memory_dir.rglob("*") if f.is_file()]
+        assert len(saved_files) >= 1, (
+            f"Expected at least one file created in memory. "
+            f"Found: {saved_files}"
+        )
+        combined_content = "\n".join(f.read_text() for f in saved_files).lower()
+        assert "log" in combined_content or "curl" in combined_content or "database" in combined_content, (
+            f"Expected debugging plan content in memory files. Content: {combined_content}"
+        )
+
+    def test_create_and_view_roundtrip(self, memory_bot, memory_dir):
+        """MicroBot should save findings and then review them before reporting.
+
+        The LLM is expected to:
+          1. Record analysis findings using the memory tool.
+          2. Review what it recorded to verify nothing was missed.
+          3. Summarize the findings in its final thoughts.
+
+        We verify:
+          - At least one file was written to disk.
+          - The LLM's summary references the recorded findings.
+        """
+        result: BotRunResult = memory_bot.run(
+            task=(
+                "You analyzed a Python project and found these issues: "
+                "1) an unused import 'os' in utils.py, "
+                "2) a missing null check in handler.py line 42. "
+                "Record these findings, then review your notes and "
+                "summarize what you found in your final thoughts."
+            ),
+            max_iterations=15,
+            timeout_in_seconds=60,
+        )
+
+        assert result.status is True, f"Task failed: {result.error}"
+        assert result.error is None
+
+        # The LLM should have created at least one memory file
+        saved_files = [f for f in memory_dir.rglob("*") if f.is_file()]
+        assert len(saved_files) >= 1, (
+            f"Expected at least one file in memory. "
+            f"Found: {list(memory_dir.rglob('*'))}"
+        )
+
+        result_lower = result.result.lower()
+        assert "import" in result_lower or "null" in result_lower or "handler" in result_lower, (
+            f"LLM should have summarized the findings. Got: {result.result}"
+        )
diff --git a/test/bot/test_upgrade_tools_for_provider.py b/test/bot/test_upgrade_tools_for_provider.py
index f5d1e98..459afd4 100644
--- a/test/bot/test_upgrade_tools_for_provider.py
+++ b/test/bot/test_upgrade_tools_for_provider.py
@@ -1,13 +1,7 @@
-"""
-Unit tests for MicroBot._upgrade_tools_for_provider.
+"""Unit tests for AnthropicApi.upgrade_tools() method.
 
 These tests verify that plain ``MemoryTool`` instances are automatically
-replaced with ``AnthropicMemoryTool`` when the model provider is Anthropic,
-and that no changes are made for other providers or other tool types.
-
-All tests bypass the heavy MicroBot constructor (Docker environment, LLM
-creation) by constructing an uninitialized instance with ``object.__new__``
-and manually setting only the attributes the method under test needs.
+replaced with ``AnthropicMemoryTool`` when using ``AnthropicApi.upgrade_tools``.
 """
 import sys
 import os
@@ -17,8 +11,7 @@
 
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")))
 
-from microbots.MicroBot import MicroBot
-from microbots.constants import ModelProvider
+from microbots.llm.anthropic_api import AnthropicApi
 from microbots.tools.tool_definitions.memory_tool import MemoryTool
 from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
 
@@ -27,15 +20,6 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
-def _bare_microbot(model_provider: str, tools: list) -> MicroBot:
-    """Return an uninitialized MicroBot with only the attributes that
-    ``_upgrade_tools_for_provider`` inspects."""
-    bot = object.__new__(MicroBot)
-    bot.model_provider = model_provider
-    bot.additional_tools = list(tools)
-    return bot
-
-
 def _memory_tool(tmp_path, instructions: str = "default instructions") -> MemoryTool:
     return MemoryTool(
         memory_dir=str(tmp_path / "memory"),
@@ -50,112 +34,84 @@ def _memory_tool(tmp_path, instructions: str = "default instructions") -> Memory
 @pytest.mark.unit
 class TestUpgradeToolsForProvider:
 
-    # -- Anthropic provider: MemoryTool → AnthropicMemoryTool ---------------
+    @pytest.fixture(autouse=True)
+    def _create_api(self):
+        with patch("microbots.llm.anthropic_api.Anthropic"):
+            self.api = AnthropicApi(system_prompt="test")
+
+    # -- AnthropicApi.upgrade_tools: MemoryTool → AnthropicMemoryTool --------
 
     def test_memory_tool_is_replaced_with_anthropic_variant(self, tmp_path):
         tool = _memory_tool(tmp_path)
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool])
 
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([tool])
 
-        assert len(bot.additional_tools) == 1
-        assert isinstance(bot.additional_tools[0], AnthropicMemoryTool)
+        assert len(upgraded) == 1
+        assert isinstance(upgraded[0], AnthropicMemoryTool)
 
     def test_memory_dir_is_forwarded_to_upgraded_tool(self, tmp_path):
         mem_dir = str(tmp_path / "my_memory")
         tool = MemoryTool(memory_dir=mem_dir)
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool])
 
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([tool])
 
-        upgraded = bot.additional_tools[0]
-        assert isinstance(upgraded, AnthropicMemoryTool)
-        assert str(upgraded.memory_dir) == mem_dir
+        assert isinstance(upgraded[0], AnthropicMemoryTool)
+        assert str(upgraded[0].memory_dir) == mem_dir
 
     def test_usage_instructions_are_forwarded_to_upgraded_tool(self, tmp_path):
         custom_instructions = "custom memory instructions for test"
         tool = _memory_tool(tmp_path, instructions=custom_instructions)
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool])
 
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([tool])
 
-        upgraded = bot.additional_tools[0]
-        assert upgraded.usage_instructions_to_llm == custom_instructions
+        assert upgraded[0].usage_instructions_to_llm == custom_instructions
 
     def test_already_anthropic_memory_tool_is_not_re_upgraded(self, tmp_path):
         existing = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory"))
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [existing])
 
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([existing])
 
-        assert len(bot.additional_tools) == 1
-        assert bot.additional_tools[0] is existing
+        assert len(upgraded) == 1
+        assert upgraded[0] is existing
 
     def test_non_memory_tools_are_kept_unchanged(self, tmp_path):
         other_tool = Mock()
         other_tool.__class__ = Mock  # not a MemoryTool subclass
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [other_tool])
 
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([other_tool])
 
-        assert len(bot.additional_tools) == 1
-        assert bot.additional_tools[0] is other_tool
+        assert len(upgraded) == 1
+        assert upgraded[0] is other_tool
 
     def test_mixed_tool_list_upgrades_only_memory_tools(self, tmp_path):
         plain_memory = _memory_tool(tmp_path)
         already_upgraded = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory2"))
         other_tool = Mock(spec=[])
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [plain_memory, already_upgraded, other_tool])
 
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([plain_memory, already_upgraded, other_tool])
 
-        assert len(bot.additional_tools) == 3
+        assert len(upgraded) == 3
         # first: should have been upgraded
-        assert isinstance(bot.additional_tools[0], AnthropicMemoryTool)
-        assert bot.additional_tools[0] is not plain_memory
+        assert isinstance(upgraded[0], AnthropicMemoryTool)
+        assert upgraded[0] is not plain_memory
         # second: already AnthropicMemoryTool, untouched
-        assert bot.additional_tools[1] is already_upgraded
+        assert upgraded[1] is already_upgraded
         # third: non-memory tool, untouched
-        assert bot.additional_tools[2] is other_tool
+        assert upgraded[2] is other_tool
 
     def test_empty_tool_list_is_a_no_op(self):
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [])
-
-        bot._upgrade_tools_for_provider()
+        upgraded = self.api.upgrade_tools([])
 
-        assert bot.additional_tools == []
+        assert upgraded == []
 
     def test_logger_info_called_for_each_upgraded_tool(self, tmp_path, caplog):
         tool1 = _memory_tool(tmp_path)
         tmp_path2 = tmp_path / "sub"
         tmp_path2.mkdir()
         tool2 = _memory_tool(tmp_path2)
-        bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool1, tool2])
 
-        with caplog.at_level(logging.INFO, logger=" MicroBot "):
-            bot._upgrade_tools_for_provider()
+        with caplog.at_level(logging.INFO):
+            self.api.upgrade_tools([tool1, tool2])
 
         upgrade_logs = [r for r in caplog.records if "Auto-upgrading" in r.message]
         assert len(upgrade_logs) == 2
-
-    # -- Non-Anthropic providers: no upgrade should happen ------------------
-
-    @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL])
-    def test_no_upgrade_for_non_anthropic_provider(self, tmp_path, provider):
-        tool = _memory_tool(tmp_path)
-        bot = _bare_microbot(provider, [tool])
-
-        bot._upgrade_tools_for_provider()
-
-        assert len(bot.additional_tools) == 1
-        assert isinstance(bot.additional_tools[0], MemoryTool)
-        assert not isinstance(bot.additional_tools[0], AnthropicMemoryTool)
-
-    @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL])
-    def test_original_tool_identity_preserved_for_non_anthropic(self, tmp_path, provider):
-        tool = _memory_tool(tmp_path)
-        bot = _bare_microbot(provider, [tool])
-
-        bot._upgrade_tools_for_provider()
-
-        assert bot.additional_tools[0] is tool
diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py
index 49674aa..2c11966 100644
--- a/test/llm/test_anthropic_api.py
+++ b/test/llm/test_anthropic_api.py
@@ -540,63 +540,37 @@ def test_anthropic_api_clear_history_integration(self):
 # ============================================================================
 
 @pytest.mark.unit
-class TestAnthropicApiNativeToolsInit:
-    """Tests for __init__ native_tools caching."""
+class TestAnthropicApiToolDictsInit:
+    """Tests for __init__ tool upgrade and tool_dicts extraction."""
 
     @pytest.fixture(autouse=True)
     def _use_patch(self, patch_anthropic_config):
         pass
 
-    def test_init_without_native_tools_has_empty_caches(self):
+    def test_init_without_additional_tools_has_empty_tool_dicts(self):
         api = AnthropicApi(system_prompt="test")
 
-        assert api.native_tools == []
-        assert api._native_tool_dicts == []
-        assert api._native_tools_by_name == {}
+        assert api._tool_dicts == []
 
-    def test_init_with_none_native_tools_has_empty_caches(self):
-        api = AnthropicApi(system_prompt="test", native_tools=None)
+    def test_init_with_none_additional_tools_has_empty_tool_dicts(self):
+        api = AnthropicApi(system_prompt="test", additional_tools=None)
 
-        assert api._native_tool_dicts == []
-        assert api._native_tools_by_name == {}
+        assert api._tool_dicts == []
 
-    def test_init_with_single_native_tool_caches_dict(self):
+    def test_init_with_tool_having_to_dict_extracts_dicts(self):
         tool = Mock()
         tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"}
+        # Ensure it's not a MemoryTool so upgrade_tools won't touch it
+        tool.__class__ = Mock
 
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+        api = AnthropicApi(system_prompt="test", additional_tools=[tool])
 
-        assert api._native_tool_dicts == [{"name": "memory", "type": "memory_20250818"}]
+        assert api._tool_dicts == [{"name": "memory", "type": "memory_20250818"}]
 
-    def test_init_with_single_native_tool_caches_by_name(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
-
-        assert "memory" in api._native_tools_by_name
-        assert api._native_tools_by_name["memory"] is tool
-
-    def test_init_with_multiple_native_tools_caches_all(self):
-        tool1 = Mock()
-        tool1.to_dict.return_value = {"name": "memory"}
-        tool2 = Mock()
-        tool2.to_dict.return_value = {"name": "bash"}
-
-        api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2])
-
-        assert len(api._native_tool_dicts) == 2
-        assert api._native_tools_by_name["memory"] is tool1
-        assert api._native_tools_by_name["bash"] is tool2
-
-    def test_init_calls_to_dict_exactly_once_per_tool(self):
-        """to_dict() must not be called again on subsequent API calls."""
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-
-        AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_init_sets_pending_tool_response_to_none(self):
+        api = AnthropicApi(system_prompt="test")
 
-        assert tool.to_dict.call_count == 1
+        assert api._pending_tool_response is None
 
 
 @pytest.mark.unit
@@ -617,10 +591,10 @@ def test_call_api_without_tools_omits_tools_kwarg(self):
         call_kwargs = api.ai_client.messages.create.call_args[1]
         assert "tools" not in call_kwargs
 
-    def test_call_api_with_tools_passes_cached_dicts(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"}
-        api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool])
+    def test_call_api_with_tool_dicts_passes_them(self):
+        dicts = [{"name": "memory", "type": "memory_20250818"}]
+        api = AnthropicApi(system_prompt="test", deployment_name="claude-3")
+        api._tool_dicts = dicts
         api.messages = [{"role": "user", "content": "hello"}]
         api.ai_client.messages.create = Mock(return_value=Mock())
 
@@ -628,35 +602,17 @@ def test_call_api_with_tools_passes_cached_dicts(self):
 
         call_kwargs = api.ai_client.messages.create.call_args[1]
         assert "tools" in call_kwargs
-        assert call_kwargs["tools"] == [{"name": "memory", "type": "memory_20250818"}]
-
-    def test_call_api_does_not_call_to_dict_again(self):
-        """to_dict() should only be called during __init__, never during _call_api."""
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool])
-        api.messages = [{"role": "user", "content": "hello"}]
-        api.ai_client.messages.create = Mock(return_value=Mock())
-
-        count_after_init = tool.to_dict.call_count  # should be 1
-        api._call_api()
-        api._call_api()
-
-        assert tool.to_dict.call_count == count_after_init  # no increase
+        assert call_kwargs["tools"] == dicts
 
 
 @pytest.mark.unit
-class TestAnthropicApiDispatchToolUse:
-    """Tests for _dispatch_tool_use."""
+class TestAnthropicApiAppendToolResult:
+    """Tests for _append_tool_result."""
 
     @pytest.fixture(autouse=True)
     def _use_patch(self, patch_anthropic_config):
         pass
 
-    # ------------------------------------------------------------------ #
-    # Helpers
-    # ------------------------------------------------------------------ #
-
     @staticmethod
     def _tool_use_block(name, tool_id="tu_001", input_data=None):
         block = Mock()
@@ -675,31 +631,21 @@ def _text_block(text="hello"):
         block.model_dump.return_value = {"type": "text", "text": text}
         return block
 
-    # ------------------------------------------------------------------ #
-    # Tests
-    # ------------------------------------------------------------------ #
-
-    def test_dispatch_appends_assistant_message_first(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "ok"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_appends_assistant_message_first(self):
+        api = AnthropicApi(system_prompt="test")
 
         response = Mock()
         response.content = [self._tool_use_block("memory")]
-        api._dispatch_tool_use(response)
+        api._append_tool_result(response, "ok")
 
         assert api.messages[0]["role"] == "assistant"
 
-    def test_dispatch_appends_tool_result_user_message(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "file listing"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_appends_tool_result_user_message(self):
+        api = AnthropicApi(system_prompt="test")
 
         response = Mock()
         response.content = [self._tool_use_block("memory", tool_id="tu_abc")]
-        api._dispatch_tool_use(response)
+        api._append_tool_result(response, "file listing")
 
         user_msg = api.messages[1]
         assert user_msg["role"] == "user"
@@ -707,100 +653,53 @@ def test_dispatch_appends_tool_result_user_message(self):
         assert user_msg["content"][0]["tool_use_id"] == "tu_abc"
         assert user_msg["content"][0]["content"] == "file listing"
 
-    def test_dispatch_calls_tool_with_correct_input(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "ok"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
-
-        input_data = {"command": "view", "path": "/memories"}
-        response = Mock()
-        response.content = [self._tool_use_block("memory", input_data=input_data)]
-        api._dispatch_tool_use(response)
-
-        tool.call.assert_called_once_with(input_data)
-
-    def test_dispatch_unknown_tool_returns_error_in_result(self):
-        api = AnthropicApi(system_prompt="test")  # no native tools
-
-        response = Mock()
-        response.content = [self._tool_use_block("unknown_tool", tool_id="tu_err")]
-        api._dispatch_tool_use(response)
-
-        content = api.messages[1]["content"][0]["content"]
-        assert "Error" in content
-        assert "unknown_tool" in content
-
-    def test_dispatch_tool_exception_returns_error_message(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.side_effect = RuntimeError("disk full")
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
-
-        response = Mock()
-        response.content = [self._tool_use_block("memory", tool_id="tu_exc")]
-        api._dispatch_tool_use(response)
-
-        content = api.messages[1]["content"][0]["content"]
-        assert "Error" in content
-        assert "disk full" in content
-
-    def test_dispatch_skips_non_tool_use_content_blocks(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "result"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_skips_non_tool_use_content_blocks(self):
+        api = AnthropicApi(system_prompt="test")
 
         response = Mock()
         response.content = [
             self._text_block("thinking..."),
             self._tool_use_block("memory", tool_id="tu_only"),
         ]
-        api._dispatch_tool_use(response)
+        api._append_tool_result(response, "result")
 
         tool_results = api.messages[1]["content"]
         assert len(tool_results) == 1
         assert tool_results[0]["tool_use_id"] == "tu_only"
 
-    def test_dispatch_handles_multiple_tool_use_blocks(self):
-        tool1 = Mock()
-        tool1.to_dict.return_value = {"name": "memory"}
-        tool1.call.return_value = "memory result"
-        tool2 = Mock()
-        tool2.to_dict.return_value = {"name": "bash"}
-        tool2.call.return_value = "bash result"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2])
+    def test_handles_multiple_tool_use_blocks(self):
+        api = AnthropicApi(system_prompt="test")
 
         response = Mock()
         response.content = [
             self._tool_use_block("memory", tool_id="id_1"),
             self._tool_use_block("bash", tool_id="id_2"),
         ]
-        api._dispatch_tool_use(response)
+        api._append_tool_result(response, "combined result")
 
         results = api.messages[1]["content"]
         assert len(results) == 2
         assert results[0]["tool_use_id"] == "id_1"
-        assert results[0]["content"] == "memory result"
+        assert results[0]["content"] == "combined result"
         assert results[1]["tool_use_id"] == "id_2"
-        assert results[1]["content"] == "bash result"
+        assert results[1]["content"] == "combined result"
 
 
 @pytest.mark.unit
-class TestAnthropicApiAskWithToolUseLoop:
-    """Tests for ask() cycling through tool_use rounds before returning JSON."""
+class TestAnthropicApiAskWithToolUse:
+    """Tests for ask() returning tool_use as LLMAskResponse and accepting tool results."""
 
     @pytest.fixture(autouse=True)
     def _use_patch(self, patch_anthropic_config):
         pass
 
     @staticmethod
-    def _tool_use_response(tool_name, tool_id):
+    def _tool_use_response(tool_name, tool_id, input_data=None):
         block = Mock()
         block.type = "tool_use"
         block.name = tool_name
         block.id = tool_id
-        block.input = {}
+        block.input = input_data or {}
         block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": tool_name}
         response = Mock()
         response.stop_reason = "tool_use"
@@ -818,40 +717,61 @@ def _text_response(json_dict):
         response.content = [block]
         return response
 
-    def test_ask_dispatches_one_tool_use_round_then_returns(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "viewed /memories"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_ask_returns_tool_use_as_ask_response(self):
+        dicts = [{"name": "memory", "type": "memory_20250818"}]
+        api = AnthropicApi(system_prompt="test")
+        api._tool_dicts = dicts
 
-        tool_resp = self._tool_use_response("memory", "tu_1")
-        final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""})
-        api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp])
+        tool_resp = self._tool_use_response("memory", "tu_1", {"command": "view", "path": "/memories"})
+        api.ai_client.messages.create = Mock(return_value=tool_resp)
 
         result = api.ask("do the task")
 
-        assert api.ai_client.messages.create.call_count == 2
-        tool.call.assert_called_once()
-        assert result.command == "ls /"
+        assert result.task_done is False
+        assert '"native_tool_calls"' in result.command
+        parsed = json.loads(result.command)
+        assert parsed["native_tool_calls"][0]["name"] == "memory"
+        assert parsed["native_tool_calls"][0]["id"] == "tu_1"
+        assert api._pending_tool_response is tool_resp
 
-    def test_ask_dispatches_multiple_tool_use_rounds(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "ok"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_ask_stores_pending_tool_response(self):
+        api = AnthropicApi(system_prompt="test")
+        api._tool_dicts = [{"name": "memory"}]
 
-        tool_resp1 = self._tool_use_response("memory", "tu_1")
-        tool_resp2 = self._tool_use_response("memory", "tu_2")
-        final_resp = self._text_response({"task_done": True, "command": "", "thoughts": "done"})
-        api.ai_client.messages.create = Mock(side_effect=[tool_resp1, tool_resp2, final_resp])
+        tool_resp = self._tool_use_response("memory", "tu_1")
+        api.ai_client.messages.create = Mock(return_value=tool_resp)
 
-        result = api.ask("do the task")
+        api.ask("do it")
 
-        assert api.ai_client.messages.create.call_count == 3
-        assert tool.call.call_count == 2
-        assert result.task_done is True
+        assert api._pending_tool_response is tool_resp
 
-    def test_ask_without_tool_use_does_not_dispatch(self):
+    def test_ask_with_pending_tool_response_formats_tool_result(self):
+        api = AnthropicApi(system_prompt="test")
+        api._tool_dicts = [{"name": "memory"}]
+
+        tool_resp = self._tool_use_response("memory", "tu_1")
+        final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""})
+        api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp])
+
+        # First ask — returns tool_use
+        api.ask("do the task")
+
+        # Second ask — sends tool result, formats as tool_result
+        result = api.ask("viewed /memories")
+
+        assert result.command == "ls /"
+        assert api._pending_tool_response is None
+
+        # Check messages contain the tool_result
+        tool_result_msgs = [
+            m for m in api.messages
+            if m["role"] == "user" and isinstance(m["content"], list)
+        ]
+        assert len(tool_result_msgs) == 1
+        assert tool_result_msgs[0]["content"][0]["type"] == "tool_result"
+        assert tool_result_msgs[0]["content"][0]["tool_use_id"] == "tu_1"
+
+    def test_ask_without_tool_use_works_normally(self):
         api = AnthropicApi(system_prompt="test")
 
         final_resp = self._text_response({"task_done": False, "command": "pwd", "thoughts": ""})
@@ -861,23 +781,34 @@ def test_ask_without_tool_use_does_not_dispatch(self):
 
         assert api.ai_client.messages.create.call_count == 1
         assert result.command == "pwd"
+        assert api._pending_tool_response is None
 
-    def test_ask_tool_use_messages_are_added_to_history(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory"}
-        tool.call.return_value = "result"
-        api = AnthropicApi(system_prompt="test", native_tools=[tool])
+    def test_ask_extracts_thoughts_from_tool_use_response(self):
+        api = AnthropicApi(system_prompt="test")
+        api._tool_dicts = [{"name": "memory"}]
 
-        tool_resp = self._tool_use_response("memory", "tu_1")
-        final_resp = self._text_response({"task_done": False, "command": "echo hi", "thoughts": ""})
-        api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp])
+        # Build a tool_use response with a text block for thoughts
+        text_block = Mock()
+        text_block.type = "text"
+        text_block.text = "Let me check memory first"
+        text_block.model_dump.return_value = {"type": "text", "text": text_block.text}
 
-        api.ask("do it")
+        tool_block = Mock()
+        tool_block.type = "tool_use"
+        tool_block.name = "memory"
+        tool_block.id = "tu_1"
+        tool_block.input = {}
+        tool_block.model_dump.return_value = {"type": "tool_use", "id": "tu_1", "name": "memory"}
+
+        response = Mock()
+        response.stop_reason = "tool_use"
+        response.content = [text_block, tool_block]
+
+        api.ai_client.messages.create = Mock(return_value=response)
+
+        result = api.ask("do the task")
 
-        # Messages: user, assistant(tool_use), user(tool_result), assistant(final json)
-        roles = [m["role"] for m in api.messages]
-        assert roles.count("user") == 2
-        assert roles.count("assistant") == 2
+        assert result.thoughts == "Let me check memory first"
 
 
 if __name__ == "__main__":
diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py
index db7069e..ab27a68 100644
--- a/test/tools/tool_definitions/test_anthropic_memory_tool.py
+++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py
@@ -101,7 +101,19 @@ def test_returns_false_for_non_claude_models(self, tmp_path):
 @pytest.mark.unit
 class TestAnthropicMemoryToolIsInvoked:
 
-    def test_always_returns_false(self, tmp_path):
+    def test_returns_true_for_native_tool_calls_with_memory(self, tmp_path):
+        tool = make_tool(tmp_path)
+        import json
+        cmd = json.dumps({"native_tool_calls": [{"name": "memory", "id": "tu_1", "input": {}}]})
+        assert tool.is_invoked(cmd) is True
+
+    def test_returns_false_for_native_tool_calls_without_memory(self, tmp_path):
+        tool = make_tool(tmp_path)
+        import json
+        cmd = json.dumps({"native_tool_calls": [{"name": "bash", "id": "tu_1", "input": {}}]})
+        assert tool.is_invoked(cmd) is False
+
+    def test_returns_false_for_plain_commands(self, tmp_path):
         tool = make_tool(tmp_path)
         for cmd in ("memory view /memories", "memory clear", "anything", ""):
             assert tool.is_invoked(cmd) is False

From c7d2657a448580e706c7e9d4b203e4ae19ccb056 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Wed, 11 Mar 2026 06:15:00 +0000
Subject: [PATCH 09/20]  enhance MemoryTool path validation and logging

---
 .../tools/tool_definitions/__init__.py        |  2 +
 .../tools/tool_definitions/memory_tool.py     | 17 ++++--
 test/llm/test_llm.py                          | 30 +++++++++-
 .../test_anthropic_memory_tool.py             | 55 +++++++++++++++++++
 4 files changed, 98 insertions(+), 6 deletions(-)
 create mode 100644 src/microbots/tools/tool_definitions/__init__.py

diff --git a/src/microbots/tools/tool_definitions/__init__.py b/src/microbots/tools/tool_definitions/__init__.py
new file mode 100644
index 0000000..88acf0d
--- /dev/null
+++ b/src/microbots/tools/tool_definitions/__init__.py
@@ -0,0 +1,2 @@
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
+from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
\ No newline at end of file
diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py
index 310f880..b63f3ff 100644
--- a/src/microbots/tools/tool_definitions/memory_tool.py
+++ b/src/microbots/tools/tool_definitions/memory_tool.py
@@ -1,4 +1,5 @@
 import logging
+import os
 import shlex
 import shutil
 from pathlib import Path
@@ -110,19 +111,22 @@ def is_model_supported(self, model_name: str) -> bool:
     def _resolve(self, path: str) -> Path:
         """Resolve a /memories/… path to an absolute host path."""
         stripped = path.lstrip("/")
+
+        # Reject any path containing '..' components before resolving
+        if ".." in Path(stripped).parts:
+            raise ValueError(f"Path traversal not allowed: {path!r}")
+
         if stripped == "memories":
             rel = ""
         elif stripped.startswith("memories/"):
             rel = stripped[len("memories/"):]
-        elif stripped.startswith(("workdir", "home", "tmp", "var", "etc", "usr")):
-            raise ValueError(
-                f"Invalid memory path: {path!r}. Use paths under /memories/."
-            )
         else:
             rel = stripped  # treat as relative to memory_dir
 
         resolved = (self._memory_dir / rel).resolve() if rel else self._memory_dir.resolve()
-        if not str(resolved).startswith(str(self._memory_dir.resolve())):
+        # Use trailing separator to prevent prefix confusion with sibling dirs
+        memory_root = str(self._memory_dir.resolve())
+        if resolved != self._memory_dir.resolve() and not str(resolved).startswith(memory_root + os.sep):
             raise ValueError(f"Path traversal not allowed: {path!r}")
         return resolved
 
@@ -184,6 +188,7 @@ def _view(self, args: list) -> CmdReturn:
             elif args[i] == "--end" and i + 1 < len(args):
                 end_line = int(args[i + 1]); i += 2
             else:
+                logger.warning("🧠 MemoryTool view: unknown flag %r (skipped)", args[i])
                 i += 1
 
         resolved = self._resolve(path)
@@ -232,6 +237,7 @@ def _str_replace(self, args: list) -> CmdReturn:
             elif args[i] == "--new" and i + 1 < len(args):
                 new_text = args[i + 1]; i += 2
             else:
+                logger.warning("🧠 MemoryTool str_replace: unknown flag %r (skipped)", args[i])
                 i += 1
         if old_text is None or new_text is None:
             return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1)
@@ -259,6 +265,7 @@ def _insert(self, args: list) -> CmdReturn:
             elif args[i] == "--text" and i + 1 < len(args):
                 text = args[i + 1]; i += 2
             else:
+                logger.warning("🧠 MemoryTool insert: unknown flag %r (skipped)", args[i])
                 i += 1
         if line_num is None or text is None:
             return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1)
diff --git a/test/llm/test_llm.py b/test/llm/test_llm.py
index bd1b82d..af5a7d2 100644
--- a/test/llm/test_llm.py
+++ b/test/llm/test_llm.py
@@ -742,4 +742,32 @@ def test_task_done_true_with_not_none_command_field(self, llm):
         assert llm_response is None
         assert llm.retries == 1
         assert len(llm.messages) == 1
-        assert "When 'task_done' is true, 'command' should be an empty string." in llm.messages[0]["content"]
\ No newline at end of file
+
+
+@pytest.mark.unit
+class TestUpgradeToolsDefault:
+    """Tests for the default LLMInterface.upgrade_tools no-op implementation."""
+
+    @pytest.fixture
+    def llm(self):
+        return ConcreteLLM()
+
+    def test_returns_same_list(self, llm):
+        """Default upgrade_tools returns the input list unchanged."""
+        tools = ["tool_a", "tool_b"]
+        result = llm.upgrade_tools(tools)
+        assert result is tools
+
+    def test_empty_list(self, llm):
+        """Default upgrade_tools handles an empty list."""
+        tools = []
+        result = llm.upgrade_tools(tools)
+        assert result == []
+
+    def test_preserves_tool_order_and_identity(self, llm):
+        """Default upgrade_tools does not reorder or copy elements."""
+        sentinel = object()
+        tools = [sentinel, "other"]
+        result = llm.upgrade_tools(tools)
+        assert result[0] is sentinel
+        assert result[1] == "other"
\ No newline at end of file
diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py
index ab27a68..81db12c 100644
--- a/test/tools/tool_definitions/test_anthropic_memory_tool.py
+++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py
@@ -364,3 +364,58 @@ def test_rename_raises_runtime_error_on_failure(self, tmp_path):
         )
         with pytest.raises(RuntimeError):
             tool.rename(cmd)
+
+
+# ---------------------------------------------------------------------------
+# invoke — non-memory tool calls are skipped
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestAnthropicMemoryToolInvoke:
+
+    def test_invoke_skips_non_memory_tool_calls(self, tmp_path):
+        """The ``if tc["name"] != "memory": continue`` branch is exercised
+        when native_tool_calls contains a non-memory tool."""
+        import json
+        from unittest.mock import Mock
+
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "f.md").write_text("hello")
+
+        command = json.dumps({
+            "native_tool_calls": [
+                {"name": "bash", "id": "tu_1", "input": {"command": "ls"}},
+                {"name": "memory", "id": "tu_2", "input": {
+                    "command": "view", "path": "/memories/f.md", "view_range": None,
+                }},
+            ]
+        })
+
+        result = tool.invoke(command, parent_bot=Mock())
+
+        assert result.return_code == 0
+        # Only the memory call should produce output; bash should be skipped
+        assert "hello" in result.stdout
+
+    def test_invoke_catches_exception_from_tool_call(self, tmp_path):
+        """The ``except Exception`` branch is exercised when tool.call() raises."""
+        import json
+        from unittest.mock import Mock, patch
+
+        tool = make_tool(tmp_path)
+
+        command = json.dumps({
+            "native_tool_calls": [
+                {"name": "memory", "id": "tu_1", "input": {
+                    "command": "view", "path": "/memories/nonexistent.md", "view_range": None,
+                }},
+            ]
+        })
+
+        # Force call() to raise an exception
+        with patch.object(tool, "call", side_effect=RuntimeError("boom")):
+            result = tool.invoke(command, parent_bot=Mock())
+
+        assert result.return_code == 0
+        assert "Error executing tool 'memory'" in result.stdout
+        assert "boom" in result.stdout

From 3067b59e77f40197fc0c70ae101205a2dd13ec61 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Wed, 11 Mar 2026 07:39:00 +0000
Subject: [PATCH 10/20]  enhance path validation

---
 src/microbots/tools/tool_definitions/memory_tool.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py
index b63f3ff..8131b8b 100644
--- a/src/microbots/tools/tool_definitions/memory_tool.py
+++ b/src/microbots/tools/tool_definitions/memory_tool.py
@@ -101,9 +101,6 @@ def __post_init__(self):
         self._memory_dir = base
         self._memory_dir.mkdir(parents=True, exist_ok=True)
 
-    def is_model_supported(self, model_name: str) -> bool:
-        return True
-
     # ---------------------------------------------------------------------- #
     # Path helpers
     # ---------------------------------------------------------------------- #
@@ -116,6 +113,11 @@ def _resolve(self, path: str) -> Path:
         if ".." in Path(stripped).parts:
             raise ValueError(f"Path traversal not allowed: {path!r}")
 
+        if path.startswith("/") and stripped != "memories" and not stripped.startswith("memories/"):
+            raise ValueError(
+                f"Invalid memory path: {path!r}. Use paths under /memories/."
+            )
+
         if stripped == "memories":
             rel = ""
         elif stripped.startswith("memories/"):

From 1ec3251864619175fa1a274a52b5072382fb2be0 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Wed, 11 Mar 2026 08:37:11 +0000
Subject: [PATCH 11/20] Add generic memory tool

---
 src/microbots/llm/anthropic_api.py            |  27 +-
 .../tools/tool_definitions/__init__.py        |   3 +-
 .../tool_definitions/anthropic_memory_tool.py | 213 ---------
 .../tools/tool_definitions/memory_tool.py     |   4 +-
 test/bot/test_memory_tool_integration.py      | 133 ++----
 test/bot/test_upgrade_tools_for_provider.py   | 117 -----
 .../test_anthropic_memory_tool.py             | 421 ------------------
 7 files changed, 46 insertions(+), 872 deletions(-)
 delete mode 100644 src/microbots/tools/tool_definitions/anthropic_memory_tool.py
 delete mode 100644 test/bot/test_upgrade_tools_for_provider.py
 delete mode 100644 test/tools/tool_definitions/test_anthropic_memory_tool.py

diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py
index a403f9c..2da9bab 100644
--- a/src/microbots/llm/anthropic_api.py
+++ b/src/microbots/llm/anthropic_api.py
@@ -21,25 +21,6 @@
 
 class AnthropicApi(LLMInterface):
 
-    def upgrade_tools(self, tools: list) -> list:
-        """Replace ``MemoryTool`` with ``AnthropicMemoryTool`` for native tool-use."""
-        from microbots.tools.tool_definitions.memory_tool import MemoryTool
-        from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
-
-        upgraded = []
-        for tool in tools:
-            if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool):
-                logger.info(
-                    "\U0001f9e0 Auto-upgrading MemoryTool \u2192 AnthropicMemoryTool for Anthropic provider"
-                )
-                upgraded.append(AnthropicMemoryTool(
-                    memory_dir=tool.memory_dir,
-                    usage_instructions=tool.usage_instructions_to_llm,
-                ))
-            else:
-                upgraded.append(tool)
-        return upgraded
-
     def __init__(
         self,
         system_prompt: str,
@@ -57,9 +38,8 @@ def __init__(
         max_retries : int
             Maximum number of retries for invalid LLM responses.
         additional_tools : Optional[List]
-            Tool objects passed from MicroBot.  Any provider-agnostic tools
-            (e.g. ``MemoryTool``) are silently upgraded to their Anthropic-
-            native variants, and their API schemas are extracted.
+            Tool objects passed from MicroBot. Any tools exposing a native
+            Anthropic schema via ``to_dict()`` are forwarded to the API.
         """
         self.ai_client = Anthropic(
             api_key=api_key,
@@ -69,10 +49,9 @@ def __init__(
         self.system_prompt = system_prompt
         self.messages = []
 
-        # Silently upgrade tools in-place and extract API schemas
+        # Preserve tool instances as provided and extract native Anthropic schemas.
         tools = additional_tools or []
         upgraded = self.upgrade_tools(tools)
-        # Mutate the original list so the caller (MicroBot) sees upgraded tools
         if additional_tools is not None:
             additional_tools[:] = upgraded
         self._tool_dicts = [
diff --git a/src/microbots/tools/tool_definitions/__init__.py b/src/microbots/tools/tool_definitions/__init__.py
index 88acf0d..44e7592 100644
--- a/src/microbots/tools/tool_definitions/__init__.py
+++ b/src/microbots/tools/tool_definitions/__init__.py
@@ -1,2 +1 @@
-from microbots.tools.tool_definitions.memory_tool import MemoryTool
-from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
\ No newline at end of file
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
\ No newline at end of file
diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
deleted file mode 100644
index ef26182..0000000
--- a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py
+++ /dev/null
@@ -1,213 +0,0 @@
-"""
-AnthropicMemoryTool — wraps Anthropic's memory tool.
-
-The memory tool lets the model persist information across conversations by
-reading and writing files in a local memory directory.  When the model invokes
-the tool, it sends a command (view, create, str_replace, insert, delete,
-rename) and the client executes it against a local filesystem directory.
-
-This implementation extends both:
-  - ``MemoryTool``:  provides all file-operation logic (_resolve, _view,
-    _create, _str_replace, _insert, _delete, _rename, _clear) and satisfies
-    the ``ToolAbstract`` ABC (install_tool, verify_tool_installation, etc.).
-  - ``BetaAbstractMemoryTool`` (SDK): provides native Anthropic dispatch and
-    the ``to_dict()`` / ``call()`` interface required by AnthropicApi.
-
-The SDK command-handler overrides (view, create, str_replace, insert, delete,
-rename) simply translate SDK command objects → arg lists and delegate to the
-inherited MemoryTool private methods, converting the CmdReturn back to a
-string as the SDK expects.
-
-The memory tool (type ``memory_20250818``) is available in the standard
-Anthropic library and does not require a beta endpoint or header.  Pass it
-via ``tools=[{"type": "memory_20250818", "name": "memory"}]`` on a regular
-``client.messages.create(...)`` call.  ``MicroBot`` auto-upgrades
-``MemoryTool`` to ``AnthropicMemoryTool`` for Anthropic providers and
-passes the tool schema to ``AnthropicApi`` via ``tool_dicts``.
-
-Usage:
-    from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
-
-    memory = AnthropicMemoryTool()
-    bot = ReadingBot(..., additional_tools=[memory])
-"""
-
-from __future__ import annotations
-
-import json
-from logging import getLogger
-from pathlib import Path
-
-from typing_extensions import override
-
-from anthropic.lib.tools import BetaAbstractMemoryTool as _SDKMemoryTool
-from anthropic.types.beta import (
-    BetaMemoryTool20250818CreateCommand,
-    BetaMemoryTool20250818DeleteCommand,
-    BetaMemoryTool20250818InsertCommand,
-    BetaMemoryTool20250818RenameCommand,
-    BetaMemoryTool20250818StrReplaceCommand,
-    BetaMemoryTool20250818ViewCommand,
-)
-
-from microbots.environment.Environment import CmdReturn
-from microbots.tools.tool_definitions.memory_tool import MemoryTool
-
-logger = getLogger(__name__)
-
-DEFAULT_MEMORY_INSTRUCTIONS = (
-    "MEMORY PROTOCOL:\n"
-    "1. ALWAYS view your memory directory BEFORE doing anything else "
-    "using the `view` command of your `memory` tool to check for earlier progress.\n"
-    "2. As you make progress on the task, record status, progress, "
-    "and key findings in your memory using the memory tool.\n"
-    "3. ASSUME INTERRUPTION: Your context window might be reset at any moment, "
-    "so you risk losing any progress that is not recorded in your memory directory.\n"
-    "4. Before completing a task, always save your final results and analysis to memory.\n"
-    "5. When editing your memory folder, always keep its content up-to-date, coherent "
-    "and organized. Rename or delete files that are no longer relevant. "
-    "Do not create new files unless necessary.\n\n"
-    "IMPORTANT: The memory tool ONLY works with paths under /memories/. "
-    "Do NOT use the memory tool to access the repository or workdir. "
-    "Use shell commands (ls, cat, etc.) for filesystem access."
-)
-
-
-class AnthropicMemoryTool(MemoryTool, _SDKMemoryTool):
-    """
-    Anthropic's built-in memory tool, backed by MemoryTool's file logic.
-
-    Inherits file-operation logic from ``MemoryTool`` (plain Python class) and
-    the SDK's native dispatch interface from ``BetaAbstractMemoryTool``.
-
-    The SDK command-handler overrides delegate to the inherited private methods
-    (``_view``, ``_create``, etc.), translating the SDK ``Command`` objects to
-    the ``args: list`` format that those methods expect, and converting the
-    returned ``CmdReturn`` to the string that the SDK API requires.
-
-    Parameters
-    ----------
-    memory_dir : str | Path | None
-        Root directory for memory files.  Defaults to ``~/.microbots/memory``.
-    usage_instructions : str | None
-        Custom instructions appended to the system prompt for the LLM.
-        Defaults to ``DEFAULT_MEMORY_INSTRUCTIONS``.
-    """
-
-    def __init__(
-        self,
-        memory_dir: str | Path | None = None,
-        usage_instructions: str | None = None,
-    ) -> None:
-        MemoryTool.__init__(
-            self,
-            memory_dir=str(memory_dir) if memory_dir else None,
-            usage_instructions_to_llm=(
-                usage_instructions
-                if usage_instructions is not None
-                else DEFAULT_MEMORY_INSTRUCTIONS
-            ),
-        )
-        _SDKMemoryTool.__init__(self)  # type: ignore[call-arg]
-
-    # ---------------------------------------------------------------------- #
-    # ToolAbstract overrides
-    # ---------------------------------------------------------------------- #
-
-    def is_model_supported(self, model_name: str) -> bool:
-        """Only Anthropic (Claude) models support the native memory tool."""
-        return "claude" in model_name.lower()
-
-    def is_invoked(self, command: str) -> bool:
-        """Return True when the command is a serialized native_tool_calls JSON
-        containing a call to the ``memory`` tool."""
-        try:
-            data = json.loads(command)
-            if "native_tool_calls" in data:
-                return any(tc["name"] == "memory" for tc in data["native_tool_calls"])
-        except (json.JSONDecodeError, KeyError, TypeError):
-            pass
-        return False
-
-    def invoke(self, command: str, parent_bot) -> CmdReturn:
-        """Execute all memory tool calls in the serialized native_tool_calls batch."""
-        data = json.loads(command)
-        results = []
-        for tc in data["native_tool_calls"]:
-            if tc["name"] != "memory":
-                continue
-            try:
-                result = self.call(tc["input"])
-                logger.info(
-                    "\U0001f9e0 Native tool 'memory' executed. Result (first 200 chars): %s",
-                    str(result)[:200],
-                )
-                results.append(str(result))
-            except Exception as exc:
-                logger.error("Native tool 'memory' raised: %s", exc)
-                results.append(f"Error executing tool 'memory': {exc}")
-        combined = "\n".join(results)
-        return CmdReturn(stdout=combined, stderr="", return_code=0)
-
-    def clear_all(self) -> None:
-        """Delete all memory files (useful for testing or resetting state)."""
-        self._clear()
-        logger.info("🧠 AnthropicMemoryTool: memory cleared at %s", self._memory_dir)
-
-    # ---------------------------------------------------------------------- #
-    # BetaAbstractMemoryTool overrides — delegate to MemoryTool private methods
-    # ---------------------------------------------------------------------- #
-
-    @override
-    def clear_all_memory(self) -> str:
-        self.clear_all()
-        return "All memory cleared"
-
-    @override
-    def view(self, command: BetaMemoryTool20250818ViewCommand) -> str:
-        args = [command.path]
-        if command.view_range:
-            args += ["--start", str(command.view_range[0]), "--end", str(command.view_range[1])]
-        result = self._view(args)
-        if result.return_code != 0:
-            raise RuntimeError(result.stderr)
-        return result.stdout
-
-    @override
-    def create(self, command: BetaMemoryTool20250818CreateCommand) -> str:
-        result = self._create([command.path, command.file_text])
-        if result.return_code != 0:
-            raise RuntimeError(result.stderr)
-        return f"File created successfully at {command.path}"
-
-    @override
-    def str_replace(self, command: BetaMemoryTool20250818StrReplaceCommand) -> str:
-        result = self._str_replace([command.path, "--old", command.old_str, "--new", command.new_str])
-        if result.return_code != 0:
-            raise RuntimeError(result.stderr)
-        return f"File {command.path} has been edited"
-
-    @override
-    def insert(self, command: BetaMemoryTool20250818InsertCommand) -> str:
-        result = self._insert([
-            command.path,
-            "--line", str(command.insert_line),
-            "--text", command.insert_text,
-        ])
-        if result.return_code != 0:
-            raise RuntimeError(result.stderr)
-        return f"Text inserted at line {command.insert_line} in {command.path}"
-
-    @override
-    def delete(self, command: BetaMemoryTool20250818DeleteCommand) -> str:
-        result = self._delete([command.path])
-        if result.return_code != 0:
-            raise RuntimeError(result.stderr)
-        return result.stdout
-
-    @override
-    def rename(self, command: BetaMemoryTool20250818RenameCommand) -> str:
-        result = self._rename([command.old_path, command.new_path])
-        if result.return_code != 0:
-            raise RuntimeError(result.stderr)
-        return result.stdout
diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py
index 8131b8b..8c05ca8 100644
--- a/src/microbots/tools/tool_definitions/memory_tool.py
+++ b/src/microbots/tools/tool_definitions/memory_tool.py
@@ -67,8 +67,8 @@
 @dataclass
 class MemoryTool(ExternalTool):
     """
-    File-backed memory tool that mirrors the ``AnthropicMemoryTool`` interface
-    but dispatches through the text command loop (compatible with all providers).
+    File-backed memory tool that dispatches through the text command loop and
+    works consistently across providers.
 
     Subclass of ``ExternalTool`` — all command lists are empty so
     ``install_tool``, ``setup_tool``, ``verify_tool_installation``, and
diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py
index 25c717d..1ce9be6 100644
--- a/test/bot/test_memory_tool_integration.py
+++ b/test/bot/test_memory_tool_integration.py
@@ -1,7 +1,7 @@
-"""Tests for the Anthropic memory tool end-to-end flow.
+"""Tests for memory tool end-to-end flow.
 
 Unit tests (mocked API):
-  Verify wiring — auto-upgrade, tool dispatch, and memory file operations
+    Verify wiring, tool dispatch, and memory file operations
   with a mocked Anthropic client. Fast, free, no API key needed.
 
 Integration tests (real API):
@@ -28,42 +28,12 @@
 from microbots import MicroBot, BotRunResult
 from microbots.llm.llm import llm_output_format_str
 from microbots.tools.tool_definitions.memory_tool import MemoryTool
-from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
 
 
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 
-def _make_tool_use_response(tool_name, tool_id, tool_input, thinking_text=""):
-    """Build a mock Anthropic API response with stop_reason='tool_use'."""
-    blocks = []
-
-    if thinking_text:
-        text_block = Mock()
-        text_block.type = "text"
-        text_block.text = thinking_text
-        blocks.append(text_block)
-
-    tool_block = Mock()
-    tool_block.type = "tool_use"
-    tool_block.name = tool_name
-    tool_block.id = tool_id
-    tool_block.input = tool_input
-    tool_block.model_dump = Mock(return_value={
-        "type": "tool_use",
-        "id": tool_id,
-        "name": tool_name,
-        "input": tool_input,
-    })
-    blocks.append(tool_block)
-
-    resp = Mock()
-    resp.stop_reason = "tool_use"
-    resp.content = blocks
-    return resp
-
-
 def _make_end_turn_response(task_done, thoughts, command=""):
     """Build a mock Anthropic API response with stop_reason='end_turn'."""
     payload = json.dumps({
@@ -88,7 +58,7 @@ def _make_end_turn_response(task_done, thoughts, command=""):
 
 @pytest.mark.unit
 class TestMemoryToolWiring:
-    """Unit tests — mocked Anthropic client, real tool dispatch and file ops."""
+    """Unit tests for provider-agnostic MemoryTool dispatch with a mocked Anthropic client."""
 
     @pytest.fixture()
     def memory_dir(self, tmp_path):
@@ -101,7 +71,7 @@ def bot(self, memory_dir):
         """Create a MicroBot with Anthropic provider and a MemoryTool.
 
         The Anthropic client is mocked, but the rest of the stack is real:
-        auto-upgrade, tool dispatch, and memory file operations.
+        tool dispatch and memory file operations.
         """
         tool = MemoryTool(
             memory_dir=str(memory_dir),
@@ -131,34 +101,27 @@ def bot(self, memory_dir):
 
     # -- Upgrade verification -----------------------------------------------
 
-    def test_memory_tool_auto_upgraded_to_anthropic_variant(self, bot):
-        """MemoryTool passed to MicroBot should be auto-upgraded to AnthropicMemoryTool."""
-        upgraded_tools = bot.additional_tools
-        memory_tools = [t for t in upgraded_tools if isinstance(t, AnthropicMemoryTool)]
-        assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade"
+    def test_memory_tool_is_retained(self, bot):
+        """MemoryTool passed to MicroBot should remain provider-agnostic."""
+        memory_tools = [t for t in bot.additional_tools if isinstance(t, MemoryTool)]
+        assert len(memory_tools) == 1, "Expected exactly one MemoryTool to remain attached"
 
     def test_tool_dicts_include_memory_schema(self, bot):
-        """The LLM should have received the memory tool schema."""
-        assert len(bot.llm._tool_dicts) == 1
-        assert bot.llm._tool_dicts[0]["type"] == "memory_20250818"
+        """Provider-agnostic MemoryTool should not register Anthropic-native schemas."""
+        assert bot.llm._tool_dicts == []
 
-    # -- Create file via tool_use -------------------------------------------
+    # -- Create file via text command ---------------------------------------
 
     def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir):
         """LLM requests a memory create → MicroBot dispatches → file appears on disk."""
         # Sequence:
-        # 1. ask(task) → API returns tool_use (memory create)
-        # 2. ask(tool_result) → API returns end_turn (task_done=True)
+        # 1. ask(task) → API returns JSON command (memory create)
+        # 2. ask(command output) → API returns end_turn (task_done=True)
         self._mock_client.messages.create.side_effect = [
-            _make_tool_use_response(
-                tool_name="memory",
-                tool_id="tool_001",
-                tool_input={
-                    "command": "create",
-                    "path": "/memories/notes.md",
-                    "file_text": "Hello from integration test",
-                },
-                thinking_text="I'll save a note to memory.",
+            _make_end_turn_response(
+                task_done=False,
+                thoughts="I'll save a note to memory.",
+                command='memory create /memories/notes.md "Hello from integration test"',
             ),
             _make_end_turn_response(
                 task_done=True,
@@ -181,7 +144,7 @@ def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir):
         assert created_file.exists(), f"Expected {created_file} to be created"
         assert created_file.read_text() == "Hello from integration test"
 
-    # -- View file via tool_use ---------------------------------------------
+    # -- View file via text command -----------------------------------------
 
     def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir):
         """LLM requests a memory view → MicroBot dispatches → file content returned."""
@@ -190,14 +153,10 @@ def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir):
         (memory_dir / "existing.md").write_text("Previously saved content")
 
         self._mock_client.messages.create.side_effect = [
-            _make_tool_use_response(
-                tool_name="memory",
-                tool_id="tool_002",
-                tool_input={
-                    "command": "view",
-                    "path": "/memories/existing.md",
-                },
-                thinking_text="Let me check my memory.",
+            _make_end_turn_response(
+                task_done=False,
+                thoughts="Let me check my memory.",
+                command="memory view /memories/existing.md",
             ),
             _make_end_turn_response(
                 task_done=True,
@@ -213,20 +172,17 @@ def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir):
 
         assert result.status is True
 
-        # Verify the view result was passed back to the API as tool_result
+        # Verify the view result was passed back to the API as the next user message
         calls = self._mock_client.messages.create.call_args_list
         assert len(calls) == 2
-        # The second call should have messages including the tool_result
+        # The second call should have messages including the file content
         second_call_messages = calls[1].kwargs.get("messages") or calls[1][1].get("messages", [])
-        tool_result_msgs = [
+        user_messages = [
             m for m in second_call_messages
-            if m.get("role") == "user" and isinstance(m.get("content"), list)
-            and any(c.get("type") == "tool_result" for c in m["content"])
+            if m.get("role") == "user" and isinstance(m.get("content"), str)
         ]
-        assert len(tool_result_msgs) >= 1, "Expected a tool_result message in the second API call"
-        # The tool_result content should contain the file content
-        tool_result_content = tool_result_msgs[-1]["content"][0]["content"]
-        assert "Previously saved content" in tool_result_content
+        assert len(user_messages) >= 2, "Expected the command output to be sent as a user message"
+        assert "Previously saved content" in user_messages[-1]["content"]
 
     # -- Multiple tool calls in sequence ------------------------------------
 
@@ -234,25 +190,16 @@ def test_create_then_view_memory_file(self, bot, memory_dir):
         """LLM creates a file, then views it — both dispatched via MicroBot loop."""
         self._mock_client.messages.create.side_effect = [
             # Step 1: create file
-            _make_tool_use_response(
-                tool_name="memory",
-                tool_id="tool_003",
-                tool_input={
-                    "command": "create",
-                    "path": "/memories/todo.md",
-                    "file_text": "- Fix bug #42\n- Write tests",
-                },
-                thinking_text="Creating a todo list.",
+            _make_end_turn_response(
+                task_done=False,
+                thoughts="Creating a todo list.",
+                command='memory create /memories/todo.md "- Fix bug #42\n- Write tests"',
             ),
             # Step 2: view file
-            _make_tool_use_response(
-                tool_name="memory",
-                tool_id="tool_004",
-                tool_input={
-                    "command": "view",
-                    "path": "/memories/todo.md",
-                },
-                thinking_text="Let me verify what I wrote.",
+            _make_end_turn_response(
+                task_done=False,
+                thoughts="Let me verify what I wrote.",
+                command="memory view /memories/todo.md",
             ),
             # Step 3: done
             _make_end_turn_response(
@@ -358,10 +305,10 @@ def memory_bot(self, memory_dir):
         yield bot
         del bot
 
-    def test_memory_tool_auto_upgraded(self, memory_bot):
-        """MemoryTool should be silently auto-upgraded to AnthropicMemoryTool."""
-        memory_tools = [t for t in memory_bot.additional_tools if isinstance(t, AnthropicMemoryTool)]
-        assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade"
+    def test_memory_tool_is_retained(self, memory_bot):
+        """MemoryTool should remain attached for Anthropic just like other providers."""
+        memory_tools = [t for t in memory_bot.additional_tools if isinstance(t, MemoryTool)]
+        assert len(memory_tools) == 1, "Expected exactly one MemoryTool to remain attached"
 
     def test_create_memory_file(self, memory_bot, memory_dir):
         """MicroBot should persist a debugging plan to memory.
diff --git a/test/bot/test_upgrade_tools_for_provider.py b/test/bot/test_upgrade_tools_for_provider.py
deleted file mode 100644
index 459afd4..0000000
--- a/test/bot/test_upgrade_tools_for_provider.py
+++ /dev/null
@@ -1,117 +0,0 @@
-"""Unit tests for AnthropicApi.upgrade_tools() method.
-
-These tests verify that plain ``MemoryTool`` instances are automatically
-replaced with ``AnthropicMemoryTool`` when using ``AnthropicApi.upgrade_tools``.
-"""
-import sys
-import os
-import logging
-import pytest
-from unittest.mock import patch, Mock
-
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")))
-
-from microbots.llm.anthropic_api import AnthropicApi
-from microbots.tools.tool_definitions.memory_tool import MemoryTool
-from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def _memory_tool(tmp_path, instructions: str = "default instructions") -> MemoryTool:
-    return MemoryTool(
-        memory_dir=str(tmp_path / "memory"),
-        usage_instructions_to_llm=instructions,
-    )
-
-
-# ---------------------------------------------------------------------------
-# Tests
-# ---------------------------------------------------------------------------
-
-@pytest.mark.unit
-class TestUpgradeToolsForProvider:
-
-    @pytest.fixture(autouse=True)
-    def _create_api(self):
-        with patch("microbots.llm.anthropic_api.Anthropic"):
-            self.api = AnthropicApi(system_prompt="test")
-
-    # -- AnthropicApi.upgrade_tools: MemoryTool → AnthropicMemoryTool --------
-
-    def test_memory_tool_is_replaced_with_anthropic_variant(self, tmp_path):
-        tool = _memory_tool(tmp_path)
-
-        upgraded = self.api.upgrade_tools([tool])
-
-        assert len(upgraded) == 1
-        assert isinstance(upgraded[0], AnthropicMemoryTool)
-
-    def test_memory_dir_is_forwarded_to_upgraded_tool(self, tmp_path):
-        mem_dir = str(tmp_path / "my_memory")
-        tool = MemoryTool(memory_dir=mem_dir)
-
-        upgraded = self.api.upgrade_tools([tool])
-
-        assert isinstance(upgraded[0], AnthropicMemoryTool)
-        assert str(upgraded[0].memory_dir) == mem_dir
-
-    def test_usage_instructions_are_forwarded_to_upgraded_tool(self, tmp_path):
-        custom_instructions = "custom memory instructions for test"
-        tool = _memory_tool(tmp_path, instructions=custom_instructions)
-
-        upgraded = self.api.upgrade_tools([tool])
-
-        assert upgraded[0].usage_instructions_to_llm == custom_instructions
-
-    def test_already_anthropic_memory_tool_is_not_re_upgraded(self, tmp_path):
-        existing = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory"))
-
-        upgraded = self.api.upgrade_tools([existing])
-
-        assert len(upgraded) == 1
-        assert upgraded[0] is existing
-
-    def test_non_memory_tools_are_kept_unchanged(self, tmp_path):
-        other_tool = Mock()
-        other_tool.__class__ = Mock  # not a MemoryTool subclass
-
-        upgraded = self.api.upgrade_tools([other_tool])
-
-        assert len(upgraded) == 1
-        assert upgraded[0] is other_tool
-
-    def test_mixed_tool_list_upgrades_only_memory_tools(self, tmp_path):
-        plain_memory = _memory_tool(tmp_path)
-        already_upgraded = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory2"))
-        other_tool = Mock(spec=[])
-
-        upgraded = self.api.upgrade_tools([plain_memory, already_upgraded, other_tool])
-
-        assert len(upgraded) == 3
-        # first: should have been upgraded
-        assert isinstance(upgraded[0], AnthropicMemoryTool)
-        assert upgraded[0] is not plain_memory
-        # second: already AnthropicMemoryTool, untouched
-        assert upgraded[1] is already_upgraded
-        # third: non-memory tool, untouched
-        assert upgraded[2] is other_tool
-
-    def test_empty_tool_list_is_a_no_op(self):
-        upgraded = self.api.upgrade_tools([])
-
-        assert upgraded == []
-
-    def test_logger_info_called_for_each_upgraded_tool(self, tmp_path, caplog):
-        tool1 = _memory_tool(tmp_path)
-        tmp_path2 = tmp_path / "sub"
-        tmp_path2.mkdir()
-        tool2 = _memory_tool(tmp_path2)
-
-        with caplog.at_level(logging.INFO):
-            self.api.upgrade_tools([tool1, tool2])
-
-        upgrade_logs = [r for r in caplog.records if "Auto-upgrading" in r.message]
-        assert len(upgrade_logs) == 2
diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py
deleted file mode 100644
index 81db12c..0000000
--- a/test/tools/tool_definitions/test_anthropic_memory_tool.py
+++ /dev/null
@@ -1,421 +0,0 @@
-"""
-Unit tests for AnthropicMemoryTool.
-
-Covers:
-  - __init__: memory_dir / usage_instructions forwarding and defaults
-  - is_model_supported
-  - is_invoked
-  - clear_all / clear_all_memory (SDK override)
-  - SDK overrides: view, create, str_replace, insert, delete, rename
-    (happy-path + RuntimeError on failure)
-"""
-import logging
-import pytest
-
-from anthropic.types.beta import (
-    BetaMemoryTool20250818CreateCommand,
-    BetaMemoryTool20250818DeleteCommand,
-    BetaMemoryTool20250818InsertCommand,
-    BetaMemoryTool20250818RenameCommand,
-    BetaMemoryTool20250818StrReplaceCommand,
-    BetaMemoryTool20250818ViewCommand,
-)
-
-from microbots.tools.tool_definitions.anthropic_memory_tool import (
-    DEFAULT_MEMORY_INSTRUCTIONS,
-    AnthropicMemoryTool,
-)
-from microbots.tools.tool_definitions.memory_tool import MemoryTool
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def make_tool(tmp_path) -> AnthropicMemoryTool:
-    return AnthropicMemoryTool(memory_dir=str(tmp_path / "memory"))
-
-
-# ---------------------------------------------------------------------------
-# __init__
-# ---------------------------------------------------------------------------
-
-@pytest.mark.unit
-class TestAnthropicMemoryToolInit:
-
-    def test_is_subclass_of_memory_tool(self, tmp_path):
-        assert isinstance(make_tool(tmp_path), MemoryTool)
-
-    def test_memory_dir_is_forwarded(self, tmp_path):
-        mem_dir = str(tmp_path / "my_memory")
-        tool = AnthropicMemoryTool(memory_dir=mem_dir)
-        assert str(tool._memory_dir) == mem_dir
-
-    def test_memory_dir_is_created_on_init(self, tmp_path):
-        mem_dir = tmp_path / "new_memory"
-        assert not mem_dir.exists()
-        AnthropicMemoryTool(memory_dir=str(mem_dir))
-        assert mem_dir.exists()
-
-    def test_default_memory_dir_under_home(self, monkeypatch, tmp_path):
-        from pathlib import Path
-        monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path))
-        tool = AnthropicMemoryTool()
-        assert tool._memory_dir == tmp_path / ".microbots" / "memory"
-
-    def test_custom_usage_instructions_are_stored(self, tmp_path):
-        custom = "custom instructions"
-        tool = AnthropicMemoryTool(
-            memory_dir=str(tmp_path / "memory"),
-            usage_instructions=custom,
-        )
-        assert tool.usage_instructions_to_llm == custom
-
-    def test_default_usage_instructions_are_applied_when_none(self, tmp_path):
-        tool = make_tool(tmp_path)
-        assert tool.usage_instructions_to_llm == DEFAULT_MEMORY_INSTRUCTIONS
-
-
-# ---------------------------------------------------------------------------
-# is_model_supported
-# ---------------------------------------------------------------------------
-
-@pytest.mark.unit
-class TestAnthropicMemoryToolIsModelSupported:
-
-    def test_returns_true_for_claude_models(self, tmp_path):
-        tool = make_tool(tmp_path)
-        for model in ("claude-3-sonnet", "claude-3-5-haiku", "Claude-Opus-4"):
-            assert tool.is_model_supported(model) is True
-
-    def test_returns_false_for_non_claude_models(self, tmp_path):
-        tool = make_tool(tmp_path)
-        for model in ("gpt-4", "ollama/llama3", "azure-openai/gpt-5", ""):
-            assert tool.is_model_supported(model) is False
-
-
-# ---------------------------------------------------------------------------
-# is_invoked
-# ---------------------------------------------------------------------------
-
-@pytest.mark.unit
-class TestAnthropicMemoryToolIsInvoked:
-
-    def test_returns_true_for_native_tool_calls_with_memory(self, tmp_path):
-        tool = make_tool(tmp_path)
-        import json
-        cmd = json.dumps({"native_tool_calls": [{"name": "memory", "id": "tu_1", "input": {}}]})
-        assert tool.is_invoked(cmd) is True
-
-    def test_returns_false_for_native_tool_calls_without_memory(self, tmp_path):
-        tool = make_tool(tmp_path)
-        import json
-        cmd = json.dumps({"native_tool_calls": [{"name": "bash", "id": "tu_1", "input": {}}]})
-        assert tool.is_invoked(cmd) is False
-
-    def test_returns_false_for_plain_commands(self, tmp_path):
-        tool = make_tool(tmp_path)
-        for cmd in ("memory view /memories", "memory clear", "anything", ""):
-            assert tool.is_invoked(cmd) is False
-
-
-# ---------------------------------------------------------------------------
-# clear_all / clear_all_memory
-# ---------------------------------------------------------------------------
-
-@pytest.mark.unit
-class TestAnthropicMemoryToolClearAll:
-
-    def test_clear_all_removes_all_files(self, tmp_path):
-        tool = make_tool(tmp_path)
-        (tool._memory_dir / "a.md").write_text("a")
-        (tool._memory_dir / "b.md").write_text("b")
-
-        tool.clear_all()
-
-        assert list(tool._memory_dir.iterdir()) == []
-
-    def test_clear_all_leaves_memory_dir_intact(self, tmp_path):
-        tool = make_tool(tmp_path)
-        tool.clear_all()
-        assert tool._memory_dir.exists()
-
-    def test_clear_all_logs_info(self, tmp_path, caplog):
-        tool = make_tool(tmp_path)
-        with caplog.at_level(logging.INFO):
-            tool.clear_all()
-        assert "AnthropicMemoryTool" in caplog.text
-        assert "cleared" in caplog.text
-
-    def test_clear_all_memory_returns_string(self, tmp_path):
-        tool = make_tool(tmp_path)
-        result = tool.clear_all_memory()
-        assert result == "All memory cleared"
-
-    def test_clear_all_memory_removes_files(self, tmp_path):
-        tool = make_tool(tmp_path)
-        (tool._memory_dir / "f.md").write_text("data")
-
-        tool.clear_all_memory()
-
-        assert list(tool._memory_dir.iterdir()) == []
-
-
-# ---------------------------------------------------------------------------
-# view (SDK override)
-# ---------------------------------------------------------------------------
-
-@pytest.mark.unit
-class TestAnthropicMemoryToolView:
-
-    def test_view_returns_file_contents(self, tmp_path):
-        tool = make_tool(tmp_path)
-        (tool._memory_dir / "notes.md").write_text("hello\nworld\n")
-
-        cmd = BetaMemoryTool20250818ViewCommand(
-            command="view", path="/memories/notes.md", view_range=None
-        )
-        result = tool.view(cmd)
-
-        assert "hello" in result
-        assert "world" in result
-
-    def test_view_with_view_range(self, tmp_path):
-        tool = make_tool(tmp_path)
-        (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n")
-
-        cmd = BetaMemoryTool20250818ViewCommand(
-            command="view", path="/memories/f.md", view_range=[2, 4]
-        )
-        result = tool.view(cmd)
-
-        assert "b" in result
-        assert "d" in result
-        assert "a" not in result
-        assert "e" not in result
-
-    def test_view_raises_runtime_error_on_failure(self, tmp_path):
-        tool = make_tool(tmp_path)
-        cmd = BetaMemoryTool20250818ViewCommand(
-            command="view", path="/memories/nonexistent.md", view_range=None
-        )
-        with pytest.raises(RuntimeError):
-            tool.view(cmd)
-
-
-# ---------------------------------------------------------------------------
-# create (SDK override)
-# ---------------------------------------------------------------------------
-
-@pytest.mark.unit
-class TestAnthropicMemoryToolCreate:
-
-    def test_create_writes_file(self, tmp_path):
-        tool = make_tool(tmp_path)
-        cmd = BetaMemoryTool20250818CreateCommand(
-            command="create", path="/memories/new.md", file_text="hello world"
-        )
-        result = tool.create(cmd)
-
-        assert "new.md" in result
-        assert (tool._memory_dir / "new.md").read_text() == "hello world"
-
-    def test_create_raises_runtime_error_on_failure(self, tmp_path):
-        """Ensures the `raise RuntimeError(result.stderr)` branch is exercised by
-        mocking _create to return a non-zero CmdReturn."""
-        from unittest.mock import patch
-        from microbots.environment.Environment import CmdReturn
-
-        tool = make_tool(tmp_path)
-        cmd = BetaMemoryTool20250818CreateCommand(
-            command="create", path="/memories/new.md", file_text="x"
-        )
-        with patch.object(
-            tool, "_create", return_value=CmdReturn(stdout="", stderr="disk full", return_code=1)
-        ):
-            with pytest.raises(RuntimeError, match="disk full"):
-                tool.create(cmd)
-
-
-# ---------------------------------------------------------------------------
-# str_replace (SDK override)
-# ---------------------------------------------------------------------------
-
-@pytest.mark.unit
-class TestAnthropicMemoryToolStrReplace:
-
-    def test_str_replace_edits_file(self, tmp_path):
-        tool = make_tool(tmp_path)
-        (tool._memory_dir / "f.md").write_text("hello world")
-
-        cmd = BetaMemoryTool20250818StrReplaceCommand(
-            command="str_replace",
-            path="/memories/f.md",
-            old_str="hello",
-            new_str="goodbye",
-        )
-        result = tool.str_replace(cmd)
-
-        assert "f.md" in result
-        assert (tool._memory_dir / "f.md").read_text() == "goodbye world"
-
-    def test_str_replace_raises_runtime_error_on_failure(self, tmp_path):
-        tool = make_tool(tmp_path)
-        (tool._memory_dir / "f.md").write_text("hello world")
-        cmd = BetaMemoryTool20250818StrReplaceCommand(
-            command="str_replace",
-            path="/memories/f.md",
-            old_str="not present",
-            new_str="x",
-        )
-        with pytest.raises(RuntimeError):
-            tool.str_replace(cmd)
-
-
-# ---------------------------------------------------------------------------
-# insert (SDK override)
-# ---------------------------------------------------------------------------
-
-@pytest.mark.unit
-class TestAnthropicMemoryToolInsert:
-
-    def test_insert_prepends_line(self, tmp_path):
-        tool = make_tool(tmp_path)
-        (tool._memory_dir / "f.md").write_text("line1\nline2\n")
-
-        cmd = BetaMemoryTool20250818InsertCommand(
-            command="insert",
-            path="/memories/f.md",
-            insert_line=0,
-            insert_text="prepended",
-        )
-        result = tool.insert(cmd)
-
-        assert "0" in result or "prepended" in result or "f.md" in result
-        lines = (tool._memory_dir / "f.md").read_text().splitlines()
-        assert lines[0] == "prepended"
-
-    def test_insert_raises_runtime_error_on_failure(self, tmp_path):
-        tool = make_tool(tmp_path)
-        cmd = BetaMemoryTool20250818InsertCommand(
-            command="insert",
-            path="/memories/missing.md",
-            insert_line=0,
-            insert_text="x",
-        )
-        with pytest.raises(RuntimeError):
-            tool.insert(cmd)
-
-
-# ---------------------------------------------------------------------------
-# delete (SDK override)
-# ---------------------------------------------------------------------------
-
-@pytest.mark.unit
-class TestAnthropicMemoryToolDelete:
-
-    def test_delete_removes_file(self, tmp_path):
-        tool = make_tool(tmp_path)
-        (tool._memory_dir / "f.md").write_text("data")
-
-        cmd = BetaMemoryTool20250818DeleteCommand(
-            command="delete", path="/memories/f.md"
-        )
-        tool.delete(cmd)
-
-        assert not (tool._memory_dir / "f.md").exists()
-
-    def test_delete_raises_runtime_error_on_failure(self, tmp_path):
-        tool = make_tool(tmp_path)
-        cmd = BetaMemoryTool20250818DeleteCommand(
-            command="delete", path="/memories/nonexistent.md"
-        )
-        with pytest.raises(RuntimeError):
-            tool.delete(cmd)
-
-
-# ---------------------------------------------------------------------------
-# rename (SDK override)
-# ---------------------------------------------------------------------------
-
-@pytest.mark.unit
-class TestAnthropicMemoryToolRename:
-
-    def test_rename_moves_file(self, tmp_path):
-        tool = make_tool(tmp_path)
-        (tool._memory_dir / "old.md").write_text("content")
-
-        cmd = BetaMemoryTool20250818RenameCommand(
-            command="rename",
-            old_path="/memories/old.md",
-            new_path="/memories/new.md",
-        )
-        tool.rename(cmd)
-
-        assert not (tool._memory_dir / "old.md").exists()
-        assert (tool._memory_dir / "new.md").read_text() == "content"
-
-    def test_rename_raises_runtime_error_on_failure(self, tmp_path):
-        tool = make_tool(tmp_path)
-        cmd = BetaMemoryTool20250818RenameCommand(
-            command="rename",
-            old_path="/memories/missing.md",
-            new_path="/memories/new.md",
-        )
-        with pytest.raises(RuntimeError):
-            tool.rename(cmd)
-
-
-# ---------------------------------------------------------------------------
-# invoke — non-memory tool calls are skipped
-# ---------------------------------------------------------------------------
-
-@pytest.mark.unit
-class TestAnthropicMemoryToolInvoke:
-
-    def test_invoke_skips_non_memory_tool_calls(self, tmp_path):
-        """The ``if tc["name"] != "memory": continue`` branch is exercised
-        when native_tool_calls contains a non-memory tool."""
-        import json
-        from unittest.mock import Mock
-
-        tool = make_tool(tmp_path)
-        (tool._memory_dir / "f.md").write_text("hello")
-
-        command = json.dumps({
-            "native_tool_calls": [
-                {"name": "bash", "id": "tu_1", "input": {"command": "ls"}},
-                {"name": "memory", "id": "tu_2", "input": {
-                    "command": "view", "path": "/memories/f.md", "view_range": None,
-                }},
-            ]
-        })
-
-        result = tool.invoke(command, parent_bot=Mock())
-
-        assert result.return_code == 0
-        # Only the memory call should produce output; bash should be skipped
-        assert "hello" in result.stdout
-
-    def test_invoke_catches_exception_from_tool_call(self, tmp_path):
-        """The ``except Exception`` branch is exercised when tool.call() raises."""
-        import json
-        from unittest.mock import Mock, patch
-
-        tool = make_tool(tmp_path)
-
-        command = json.dumps({
-            "native_tool_calls": [
-                {"name": "memory", "id": "tu_1", "input": {
-                    "command": "view", "path": "/memories/nonexistent.md", "view_range": None,
-                }},
-            ]
-        })
-
-        # Force call() to raise an exception
-        with patch.object(tool, "call", side_effect=RuntimeError("boom")):
-            result = tool.invoke(command, parent_bot=Mock())
-
-        assert result.return_code == 0
-        assert "Error executing tool 'memory'" in result.stdout
-        assert "boom" in result.stdout

From 364be952c1538d7d180b2de5fed7efd3df2534cd Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Wed, 11 Mar 2026 12:57:29 +0000
Subject: [PATCH 12/20] Add test in MemoryTool

---
 test/tools/tool_definitions/test_memory_tool.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py
index 089ca52..584aa54 100644
--- a/test/tools/tool_definitions/test_memory_tool.py
+++ b/test/tools/tool_definitions/test_memory_tool.py
@@ -92,6 +92,19 @@ def test_resolve_rejects_path_traversal(self, tmp_path):
         with pytest.raises(ValueError, match="Path traversal"):
             tool._resolve("/memories/../../etc/passwd")
 
+    def test_resolve_rejects_symlink_escaping_memory_dir(self, tmp_path):
+        """A symlink inside memory_dir that resolves outside must be rejected
+        by the startswith guard (not the '..' check)."""
+        tool = make_tool(tmp_path)
+        outside = tmp_path / "outside"
+        outside.mkdir()
+        (outside / "secret.txt").write_text("sensitive")
+        # Create a symlink inside the memory dir pointing outside
+        link = tool._memory_dir / "escape"
+        link.symlink_to(outside)
+        with pytest.raises(ValueError, match="Path traversal"):
+            tool._resolve("/memories/escape/secret.txt")
+
     def test_resolve_rejects_non_memory_paths(self, tmp_path):
         tool = make_tool(tmp_path)
         for bad in ("/workdir/file", "/home/user/file", "/tmp/file"):

From 61704c9977a8d65fc5b699cc77256c10a2b31de9 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Thu, 12 Mar 2026 05:25:45 +0000
Subject: [PATCH 13/20] Refactor AnthropicApi and LLMInterface: remove unused
 parameters and methods

---
 src/microbots/MicroBot.py                |   4 +-
 src/microbots/llm/anthropic_api.py       | 129 ++---------
 src/microbots/llm/llm.py                 |   9 -
 src/microbots/tools/tool.py              |   4 +-
 test/bot/test_memory_tool_integration.py |   4 -
 test/llm/test_anthropic_api.py           | 276 -----------------------
 test/llm/test_llm.py                     |  31 +--
 7 files changed, 17 insertions(+), 440 deletions(-)

diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py
index f2708b1..3ace5a4 100644
--- a/src/microbots/MicroBot.py
+++ b/src/microbots/MicroBot.py
@@ -335,9 +335,7 @@ def _create_llm(self):
             )
         elif self.model_provider == ModelProvider.ANTHROPIC:
             self.llm = AnthropicApi(
-                system_prompt=system_prompt_with_tools,
-                deployment_name=self.deployment_name,
-                additional_tools=self.additional_tools,
+                system_prompt=system_prompt_with_tools, deployment_name=self.deployment_name
             )
         # No Else case required as model provider is already validated using _validate_model_and_provider
 
diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py
index 2da9bab..f40118a 100644
--- a/src/microbots/llm/anthropic_api.py
+++ b/src/microbots/llm/anthropic_api.py
@@ -1,9 +1,7 @@
 import json
 import os
-import re
 from dataclasses import asdict
 from logging import getLogger
-from typing import List, Optional
 
 from dotenv import load_dotenv
 from anthropic import Anthropic
@@ -18,29 +16,9 @@
 api_key = os.getenv("ANTHROPIC_API_KEY")
 
 
-
 class AnthropicApi(LLMInterface):
 
-    def __init__(
-        self,
-        system_prompt: str,
-        deployment_name: str = deployment_name,
-        max_retries: int = 3,
-        additional_tools: Optional[List] = None,
-    ):
-        """
-        Parameters
-        ----------
-        system_prompt : str
-            System prompt for the LLM.
-        deployment_name : str
-            The Anthropic model deployment name.
-        max_retries : int
-            Maximum number of retries for invalid LLM responses.
-        additional_tools : Optional[List]
-            Tool objects passed from MicroBot. Any tools exposing a native
-            Anthropic schema via ``to_dict()`` are forwarded to the API.
-        """
+    def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3):
         self.ai_client = Anthropic(
             api_key=api_key,
             base_url=endpoint
@@ -49,111 +27,30 @@ def __init__(
         self.system_prompt = system_prompt
         self.messages = []
 
-        # Preserve tool instances as provided and extract native Anthropic schemas.
-        tools = additional_tools or []
-        upgraded = self.upgrade_tools(tools)
-        if additional_tools is not None:
-            additional_tools[:] = upgraded
-        self._tool_dicts = [
-            t.to_dict() for t in upgraded
-            if callable(getattr(t, "to_dict", None))
-        ]
-        self._pending_tool_response = None
-
         # Set these values here. This logic will be handled in the parent class.
         self.max_retries = max_retries
         self.retries = 0
 
-    # ---------------------------------------------------------------------- #
-    # Internal helpers
-    # ---------------------------------------------------------------------- #
-
-    def _call_api(self) -> object:
-        """Call the Anthropic messages API, including tool definitions when present."""
-        kwargs = dict(
-            model=self.deployment_name,
-            system=self.system_prompt,
-            messages=self.messages,
-            max_tokens=4096,
-        )
-
-        if self._tool_dicts:
-            kwargs["tools"] = self._tool_dicts
-
-        return self.ai_client.messages.create(**kwargs)
-
-    def _append_tool_result(self, response, result_text: str) -> None:
-        """Append the assistant tool_use turn and the corresponding tool_result user turn.
-
-        Called when the caller provides the tool execution result via
-        the next ``ask()`` call.
-        """
-        assistant_content = [block.model_dump() for block in response.content]
-        self.messages.append({"role": "assistant", "content": assistant_content})
-
-        tool_results = []
-        for block in response.content:
-            if block.type != "tool_use":
-                continue
-            tool_results.append({
-                "type": "tool_result",
-                "tool_use_id": block.id,
-                "content": str(result_text),
-            })
-
-        self.messages.append({"role": "user", "content": tool_results})
-
-    # ---------------------------------------------------------------------- #
-    # Public interface
-    # ---------------------------------------------------------------------- #
-
-    def ask(self, message: str) -> LLMAskResponse:
+    def ask(self, message) -> LLMAskResponse:
         self.retries = 0  # reset retries for each ask. Handled in parent class.
 
-        if self._pending_tool_response:
-            # Previous response was tool_use — format this message as tool results.
-            self._append_tool_result(self._pending_tool_response, message)
-            self._pending_tool_response = None
-        else:
-            self.messages.append({"role": "user", "content": message})
+        self.messages.append({"role": "user", "content": message})
 
         valid = False
         while not valid:
-            response = self._call_api()
-
-            if response.stop_reason == "tool_use":
-                # Return tool call info as an LLMAskResponse so the
-                # caller (MicroBot.run) can dispatch the tool.
-                self._pending_tool_response = response
-
-                thoughts = ""
-                for block in response.content:
-                    if block.type == "text":
-                        thoughts = block.text
-                        break
-
-                tool_calls = []
-                for block in response.content:
-                    if block.type == "tool_use":
-                        tool_calls.append({
-                            "name": block.name,
-                            "id": block.id,
-                            "input": block.input,
-                        })
-
-                command = json.dumps({"native_tool_calls": tool_calls})
-                return LLMAskResponse(task_done=False, thoughts=thoughts, command=command)
-
-            # Extract text content from the final response
-            response_text = ""
-            for block in response.content:
-                if block.type == "text":
-                    response_text = block.text
-                    break
-
+            response = self.ai_client.messages.create(
+                model=self.deployment_name,
+                system=self.system_prompt,
+                messages=self.messages,
+                max_tokens=4096,
+            )
+
+            # Extract text content from response
+            response_text = response.content[0].text if response.content else ""
             logger.debug("Raw Anthropic response (first 500 chars): %s", response_text[:500])
 
             # Try to extract JSON if wrapped in markdown code blocks
+            import re
             json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL)
             if json_match:
                 response_text = json_match.group(1)
diff --git a/src/microbots/llm/llm.py b/src/microbots/llm/llm.py
index e7e5c22..2800790 100644
--- a/src/microbots/llm/llm.py
+++ b/src/microbots/llm/llm.py
@@ -29,15 +29,6 @@ def ask(self, message: str) -> LLMAskResponse:
     def clear_history(self) -> bool:
         pass
 
-    def upgrade_tools(self, tools: list) -> list:
-        """Upgrade tools for the specific LLM provider.
-
-        The default implementation is a no-op.  Subclasses (e.g.
-        ``AnthropicApi``) override this to swap provider-agnostic tools
-        with their native equivalents.
-        """
-        return tools
-
     def _validate_llm_response(self, response: str) -> tuple[bool, LLMAskResponse]:
 
         if self.retries >= self.max_retries:
diff --git a/src/microbots/tools/tool.py b/src/microbots/tools/tool.py
index 6b8351f..39eaa4e 100644
--- a/src/microbots/tools/tool.py
+++ b/src/microbots/tools/tool.py
@@ -40,8 +40,8 @@ class ToolAbstract(ABC):
     Tool hierarchy:
         ToolAbstract (ABC)
         ├── Tool (ToolAbstract)   — Docker sandbox tools (install_commands, env_variables, etc.)
-        └── ExternalTool (ToolAbstract)   — LLM-native tools (get_tool_definition, execute)
-                └── AnthropicMemoryTool
+        └── ExternalTool (ToolAbstract)   — Host-side tools (get_tool_definition, execute)
+                └── MemoryTool
     """
     # TODO: Handle different instructions based on the platform (linux flavours, windows, mac)
     # TODO: Add versioning to tools
diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py
index 1ce9be6..3debe6c 100644
--- a/test/bot/test_memory_tool_integration.py
+++ b/test/bot/test_memory_tool_integration.py
@@ -106,10 +106,6 @@ def test_memory_tool_is_retained(self, bot):
         memory_tools = [t for t in bot.additional_tools if isinstance(t, MemoryTool)]
         assert len(memory_tools) == 1, "Expected exactly one MemoryTool to remain attached"
 
-    def test_tool_dicts_include_memory_schema(self, bot):
-        """Provider-agnostic MemoryTool should not register Anthropic-native schemas."""
-        assert bot.llm._tool_dicts == []
-
     # -- Create file via text command ---------------------------------------
 
     def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir):
diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py
index 2c11966..4db2d96 100644
--- a/test/llm/test_anthropic_api.py
+++ b/test/llm/test_anthropic_api.py
@@ -535,282 +535,6 @@ def test_anthropic_api_clear_history_integration(self):
         assert len(api.messages) == 0  # Anthropic doesn't store system in messages
 
 
-# ============================================================================
-# Tests for native_tools support (new changes)
-# ============================================================================
-
-@pytest.mark.unit
-class TestAnthropicApiToolDictsInit:
-    """Tests for __init__ tool upgrade and tool_dicts extraction."""
-
-    @pytest.fixture(autouse=True)
-    def _use_patch(self, patch_anthropic_config):
-        pass
-
-    def test_init_without_additional_tools_has_empty_tool_dicts(self):
-        api = AnthropicApi(system_prompt="test")
-
-        assert api._tool_dicts == []
-
-    def test_init_with_none_additional_tools_has_empty_tool_dicts(self):
-        api = AnthropicApi(system_prompt="test", additional_tools=None)
-
-        assert api._tool_dicts == []
-
-    def test_init_with_tool_having_to_dict_extracts_dicts(self):
-        tool = Mock()
-        tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"}
-        # Ensure it's not a MemoryTool so upgrade_tools won't touch it
-        tool.__class__ = Mock
-
-        api = AnthropicApi(system_prompt="test", additional_tools=[tool])
-
-        assert api._tool_dicts == [{"name": "memory", "type": "memory_20250818"}]
-
-    def test_init_sets_pending_tool_response_to_none(self):
-        api = AnthropicApi(system_prompt="test")
-
-        assert api._pending_tool_response is None
-
-
-@pytest.mark.unit
-class TestAnthropicApiCallApiWithTools:
-    """Tests for _call_api including/excluding the tools kwarg."""
-
-    @pytest.fixture(autouse=True)
-    def _use_patch(self, patch_anthropic_config):
-        pass
-
-    def test_call_api_without_tools_omits_tools_kwarg(self):
-        api = AnthropicApi(system_prompt="test", deployment_name="claude-3")
-        api.messages = [{"role": "user", "content": "hello"}]
-        api.ai_client.messages.create = Mock(return_value=Mock())
-
-        api._call_api()
-
-        call_kwargs = api.ai_client.messages.create.call_args[1]
-        assert "tools" not in call_kwargs
-
-    def test_call_api_with_tool_dicts_passes_them(self):
-        dicts = [{"name": "memory", "type": "memory_20250818"}]
-        api = AnthropicApi(system_prompt="test", deployment_name="claude-3")
-        api._tool_dicts = dicts
-        api.messages = [{"role": "user", "content": "hello"}]
-        api.ai_client.messages.create = Mock(return_value=Mock())
-
-        api._call_api()
-
-        call_kwargs = api.ai_client.messages.create.call_args[1]
-        assert "tools" in call_kwargs
-        assert call_kwargs["tools"] == dicts
-
-
-@pytest.mark.unit
-class TestAnthropicApiAppendToolResult:
-    """Tests for _append_tool_result."""
-
-    @pytest.fixture(autouse=True)
-    def _use_patch(self, patch_anthropic_config):
-        pass
-
-    @staticmethod
-    def _tool_use_block(name, tool_id="tu_001", input_data=None):
-        block = Mock()
-        block.type = "tool_use"
-        block.name = name
-        block.id = tool_id
-        block.input = input_data or {}
-        block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": name}
-        return block
-
-    @staticmethod
-    def _text_block(text="hello"):
-        block = Mock()
-        block.type = "text"
-        block.text = text
-        block.model_dump.return_value = {"type": "text", "text": text}
-        return block
-
-    def test_appends_assistant_message_first(self):
-        api = AnthropicApi(system_prompt="test")
-
-        response = Mock()
-        response.content = [self._tool_use_block("memory")]
-        api._append_tool_result(response, "ok")
-
-        assert api.messages[0]["role"] == "assistant"
-
-    def test_appends_tool_result_user_message(self):
-        api = AnthropicApi(system_prompt="test")
-
-        response = Mock()
-        response.content = [self._tool_use_block("memory", tool_id="tu_abc")]
-        api._append_tool_result(response, "file listing")
-
-        user_msg = api.messages[1]
-        assert user_msg["role"] == "user"
-        assert user_msg["content"][0]["type"] == "tool_result"
-        assert user_msg["content"][0]["tool_use_id"] == "tu_abc"
-        assert user_msg["content"][0]["content"] == "file listing"
-
-    def test_skips_non_tool_use_content_blocks(self):
-        api = AnthropicApi(system_prompt="test")
-
-        response = Mock()
-        response.content = [
-            self._text_block("thinking..."),
-            self._tool_use_block("memory", tool_id="tu_only"),
-        ]
-        api._append_tool_result(response, "result")
-
-        tool_results = api.messages[1]["content"]
-        assert len(tool_results) == 1
-        assert tool_results[0]["tool_use_id"] == "tu_only"
-
-    def test_handles_multiple_tool_use_blocks(self):
-        api = AnthropicApi(system_prompt="test")
-
-        response = Mock()
-        response.content = [
-            self._tool_use_block("memory", tool_id="id_1"),
-            self._tool_use_block("bash", tool_id="id_2"),
-        ]
-        api._append_tool_result(response, "combined result")
-
-        results = api.messages[1]["content"]
-        assert len(results) == 2
-        assert results[0]["tool_use_id"] == "id_1"
-        assert results[0]["content"] == "combined result"
-        assert results[1]["tool_use_id"] == "id_2"
-        assert results[1]["content"] == "combined result"
-
-
-@pytest.mark.unit
-class TestAnthropicApiAskWithToolUse:
-    """Tests for ask() returning tool_use as LLMAskResponse and accepting tool results."""
-
-    @pytest.fixture(autouse=True)
-    def _use_patch(self, patch_anthropic_config):
-        pass
-
-    @staticmethod
-    def _tool_use_response(tool_name, tool_id, input_data=None):
-        block = Mock()
-        block.type = "tool_use"
-        block.name = tool_name
-        block.id = tool_id
-        block.input = input_data or {}
-        block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": tool_name}
-        response = Mock()
-        response.stop_reason = "tool_use"
-        response.content = [block]
-        return response
-
-    @staticmethod
-    def _text_response(json_dict):
-        block = Mock()
-        block.type = "text"
-        block.text = json.dumps(json_dict)
-        block.model_dump.return_value = {"type": "text", "text": block.text}
-        response = Mock()
-        response.stop_reason = "end_turn"
-        response.content = [block]
-        return response
-
-    def test_ask_returns_tool_use_as_ask_response(self):
-        dicts = [{"name": "memory", "type": "memory_20250818"}]
-        api = AnthropicApi(system_prompt="test")
-        api._tool_dicts = dicts
-
-        tool_resp = self._tool_use_response("memory", "tu_1", {"command": "view", "path": "/memories"})
-        api.ai_client.messages.create = Mock(return_value=tool_resp)
-
-        result = api.ask("do the task")
-
-        assert result.task_done is False
-        assert '"native_tool_calls"' in result.command
-        parsed = json.loads(result.command)
-        assert parsed["native_tool_calls"][0]["name"] == "memory"
-        assert parsed["native_tool_calls"][0]["id"] == "tu_1"
-        assert api._pending_tool_response is tool_resp
-
-    def test_ask_stores_pending_tool_response(self):
-        api = AnthropicApi(system_prompt="test")
-        api._tool_dicts = [{"name": "memory"}]
-
-        tool_resp = self._tool_use_response("memory", "tu_1")
-        api.ai_client.messages.create = Mock(return_value=tool_resp)
-
-        api.ask("do it")
-
-        assert api._pending_tool_response is tool_resp
-
-    def test_ask_with_pending_tool_response_formats_tool_result(self):
-        api = AnthropicApi(system_prompt="test")
-        api._tool_dicts = [{"name": "memory"}]
-
-        tool_resp = self._tool_use_response("memory", "tu_1")
-        final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""})
-        api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp])
-
-        # First ask — returns tool_use
-        api.ask("do the task")
-
-        # Second ask — sends tool result, formats as tool_result
-        result = api.ask("viewed /memories")
-
-        assert result.command == "ls /"
-        assert api._pending_tool_response is None
-
-        # Check messages contain the tool_result
-        tool_result_msgs = [
-            m for m in api.messages
-            if m["role"] == "user" and isinstance(m["content"], list)
-        ]
-        assert len(tool_result_msgs) == 1
-        assert tool_result_msgs[0]["content"][0]["type"] == "tool_result"
-        assert tool_result_msgs[0]["content"][0]["tool_use_id"] == "tu_1"
-
-    def test_ask_without_tool_use_works_normally(self):
-        api = AnthropicApi(system_prompt="test")
-
-        final_resp = self._text_response({"task_done": False, "command": "pwd", "thoughts": ""})
-        api.ai_client.messages.create = Mock(return_value=final_resp)
-
-        result = api.ask("where am I?")
-
-        assert api.ai_client.messages.create.call_count == 1
-        assert result.command == "pwd"
-        assert api._pending_tool_response is None
-
-    def test_ask_extracts_thoughts_from_tool_use_response(self):
-        api = AnthropicApi(system_prompt="test")
-        api._tool_dicts = [{"name": "memory"}]
-
-        # Build a tool_use response with a text block for thoughts
-        text_block = Mock()
-        text_block.type = "text"
-        text_block.text = "Let me check memory first"
-        text_block.model_dump.return_value = {"type": "text", "text": text_block.text}
-
-        tool_block = Mock()
-        tool_block.type = "tool_use"
-        tool_block.name = "memory"
-        tool_block.id = "tu_1"
-        tool_block.input = {}
-        tool_block.model_dump.return_value = {"type": "tool_use", "id": "tu_1", "name": "memory"}
-
-        response = Mock()
-        response.stop_reason = "tool_use"
-        response.content = [text_block, tool_block]
-
-        api.ai_client.messages.create = Mock(return_value=response)
-
-        result = api.ask("do the task")
-
-        assert result.thoughts == "Let me check memory first"
-
-
 if __name__ == "__main__":
     pytest.main([__file__, "-v"])
 
diff --git a/test/llm/test_llm.py b/test/llm/test_llm.py
index af5a7d2..aa64fe7 100644
--- a/test/llm/test_llm.py
+++ b/test/llm/test_llm.py
@@ -741,33 +741,4 @@ def test_task_done_true_with_not_none_command_field(self, llm):
         assert valid is False
         assert llm_response is None
         assert llm.retries == 1
-        assert len(llm.messages) == 1
-
-
-@pytest.mark.unit
-class TestUpgradeToolsDefault:
-    """Tests for the default LLMInterface.upgrade_tools no-op implementation."""
-
-    @pytest.fixture
-    def llm(self):
-        return ConcreteLLM()
-
-    def test_returns_same_list(self, llm):
-        """Default upgrade_tools returns the input list unchanged."""
-        tools = ["tool_a", "tool_b"]
-        result = llm.upgrade_tools(tools)
-        assert result is tools
-
-    def test_empty_list(self, llm):
-        """Default upgrade_tools handles an empty list."""
-        tools = []
-        result = llm.upgrade_tools(tools)
-        assert result == []
-
-    def test_preserves_tool_order_and_identity(self, llm):
-        """Default upgrade_tools does not reorder or copy elements."""
-        sentinel = object()
-        tools = [sentinel, "other"]
-        result = llm.upgrade_tools(tools)
-        assert result[0] is sentinel
-        assert result[1] == "other"
\ No newline at end of file
+        assert len(llm.messages) == 1
\ No newline at end of file

From 7d6c782a832fca7561ed4b304fa852c7049693f3 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Thu, 12 Mar 2026 05:51:17 +0000
Subject: [PATCH 14/20] Update comment

---
 src/microbots/tools/tool.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/microbots/tools/tool.py b/src/microbots/tools/tool.py
index 39eaa4e..6b8351f 100644
--- a/src/microbots/tools/tool.py
+++ b/src/microbots/tools/tool.py
@@ -40,8 +40,8 @@ class ToolAbstract(ABC):
     Tool hierarchy:
         ToolAbstract (ABC)
         ├── Tool (ToolAbstract)   — Docker sandbox tools (install_commands, env_variables, etc.)
-        └── ExternalTool (ToolAbstract)   — Host-side tools (get_tool_definition, execute)
-                └── MemoryTool
+        └── ExternalTool (ToolAbstract)   — LLM-native tools (get_tool_definition, execute)
+                └── AnthropicMemoryTool
     """
     # TODO: Handle different instructions based on the platform (linux flavours, windows, mac)
     # TODO: Add versioning to tools

From 2de384416d99d215da74c59c1c62b38ea008f81c Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Thu, 12 Mar 2026 08:34:26 +0000
Subject: [PATCH 15/20] Refactor test files: remove unnecessary imports and
 clean up code

---
 pytest.ini                                    |  1 -
 .../tools/tool_definitions/__init__.py        |  2 +-
 .../tools/tool_definitions/memory_tool.py     |  1 +
 test/bot/test_memory_tool_integration.py      |  5 -----
 test/llm/test_anthropic_api.py                | 22 -------------------
 test/llm/test_llm.py                          |  3 ++-
 .../tool_definitions/test_memory_tool.py      |  4 ----
 7 files changed, 4 insertions(+), 34 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index 80758ec..d0e4f1f 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -11,6 +11,5 @@ addopts =
 markers =
     unit: Unit tests
     integration: Integration tests
-    anthropic_integration: Integration tests requiring a real Anthropic API key
     slow: Slow tests
     docker: marks tests that require a running Docker daemon and pull container images
diff --git a/src/microbots/tools/tool_definitions/__init__.py b/src/microbots/tools/tool_definitions/__init__.py
index 44e7592..1aa24ce 100644
--- a/src/microbots/tools/tool_definitions/__init__.py
+++ b/src/microbots/tools/tool_definitions/__init__.py
@@ -1 +1 @@
-from microbots.tools.tool_definitions.memory_tool import MemoryTool
\ No newline at end of file
+from microbots.tools.tool_definitions.memory_tool import MemoryTool
diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py
index 8c05ca8..c584f30 100644
--- a/src/microbots/tools/tool_definitions/memory_tool.py
+++ b/src/microbots/tools/tool_definitions/memory_tool.py
@@ -61,6 +61,7 @@
 - Paths must start with /memories/.
 - memory create overwrites if the file already exists.
 - memory str_replace requires the old text to appear exactly once.
+- In memory view, use --end -1 to read through the end of the file.
 """
 
 
diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py
index 3debe6c..b20c32b 100644
--- a/test/bot/test_memory_tool_integration.py
+++ b/test/bot/test_memory_tool_integration.py
@@ -12,7 +12,6 @@
 
 import json
 import os
-import sys
 from pathlib import Path
 from unittest.mock import Mock, patch
 
@@ -21,10 +20,6 @@
 
 load_dotenv()
 
-sys.path.insert(
-    0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))
-)
-
 from microbots import MicroBot, BotRunResult
 from microbots.llm.llm import llm_output_format_str
 from microbots.tools.tool_definitions.memory_tool import MemoryTool
diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py
index 4db2d96..674294c 100644
--- a/test/llm/test_anthropic_api.py
+++ b/test/llm/test_anthropic_api.py
@@ -98,9 +98,7 @@ def test_ask_successful_response(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
-        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
-        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "echo 'hello'",
@@ -135,9 +133,7 @@ def test_ask_with_task_done_true(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
-        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
-        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": True,
             "command": "",
@@ -161,16 +157,12 @@ def test_ask_with_retry_on_invalid_response(self):
 
         # Mock the Anthropic client to return invalid then valid response
         mock_invalid_response = Mock()
-        mock_invalid_response.stop_reason = "end_turn"
         mock_invalid_content = Mock()
-        mock_invalid_content.type = "text"
         mock_invalid_content.text = "invalid json"
         mock_invalid_response.content = [mock_invalid_content]
 
         mock_valid_response = Mock()
-        mock_valid_response.stop_reason = "end_turn"
         mock_valid_content = Mock()
-        mock_valid_content.type = "text"
         mock_valid_content.text = json.dumps({
             "task_done": False,
             "command": "ls -la",
@@ -201,9 +193,7 @@ def test_ask_appends_user_message(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
-        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
-        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "pwd",
@@ -228,9 +218,7 @@ def test_ask_appends_assistant_response_as_json(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
-        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
-        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "echo test",
@@ -259,9 +247,7 @@ def test_ask_uses_asdict_for_response(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
-        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
-        mock_content.type = "text"
         response_dict = {
             "task_done": True,
             "command": "",
@@ -291,9 +277,7 @@ def test_ask_resets_retries_to_zero(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
-        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
-        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "ls",
@@ -315,9 +299,7 @@ def test_ask_extracts_json_from_markdown(self):
 
         # Mock response with markdown-wrapped JSON
         mock_response = Mock()
-        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
-        mock_content.type = "text"
         mock_content.text = """Here's the response:
 ```json
 {
@@ -438,9 +420,7 @@ def test_ask_with_empty_message(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
-        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
-        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "echo ''",
@@ -463,9 +443,7 @@ def test_multiple_ask_calls_append_messages(self):
 
         # Mock the Anthropic client response
         mock_response = Mock()
-        mock_response.stop_reason = "end_turn"
         mock_content = Mock()
-        mock_content.type = "text"
         mock_content.text = json.dumps({
             "task_done": False,
             "command": "pwd",
diff --git a/test/llm/test_llm.py b/test/llm/test_llm.py
index aa64fe7..bd1b82d 100644
--- a/test/llm/test_llm.py
+++ b/test/llm/test_llm.py
@@ -741,4 +741,5 @@ def test_task_done_true_with_not_none_command_field(self, llm):
         assert valid is False
         assert llm_response is None
         assert llm.retries == 1
-        assert len(llm.messages) == 1
\ No newline at end of file
+        assert len(llm.messages) == 1
+        assert "When 'task_done' is true, 'command' should be an empty string." in llm.messages[0]["content"]
\ No newline at end of file
diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py
index 584aa54..6345b67 100644
--- a/test/tools/tool_definitions/test_memory_tool.py
+++ b/test/tools/tool_definitions/test_memory_tool.py
@@ -4,14 +4,10 @@
 All tests use pytest's tmp_path fixture so they are isolated from the
 user's real ~/.microbots/memory directory.
 """
-import sys
-import os
 import pytest
 from pathlib import Path
 from unittest.mock import Mock
 
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../src")))
-
 from microbots.tools.tool_definitions.memory_tool import MemoryTool
 from microbots.environment.Environment import CmdReturn
 

From ee0b08065f0f6305b6550d4578a491a795e6144b Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Thu, 12 Mar 2026 09:47:25 +0000
Subject: [PATCH 16/20] Refactor MemoryTool integration tests: streamline mock
 responses and add OpenAI tests

---
 test/bot/test_memory_tool_integration.py | 340 +++++++++++++++--------
 1 file changed, 224 insertions(+), 116 deletions(-)

diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py
index b20c32b..3c03df5 100644
--- a/test/bot/test_memory_tool_integration.py
+++ b/test/bot/test_memory_tool_integration.py
@@ -1,27 +1,12 @@
-"""Tests for memory tool end-to-end flow.
-
-Unit tests (mocked API):
-    Verify wiring, tool dispatch, and memory file operations
-  with a mocked Anthropic client. Fast, free, no API key needed.
-
-Integration tests (real API):
-  Hit the actual Anthropic API to verify the full round-trip.
-  Gated behind ``@pytest.mark.anthropic_integration``.
-  Require ``ANTHROPIC_API_KEY`` in .env.
-"""
-
 import json
 import os
-from pathlib import Path
 from unittest.mock import Mock, patch
 
 import pytest
-from dotenv import load_dotenv
-
-load_dotenv()
 
 from microbots import MicroBot, BotRunResult
-from microbots.llm.llm import llm_output_format_str
+from microbots.constants import DOCKER_WORKING_DIR, PermissionLabels
+from microbots.extras.mount import Mount
 from microbots.tools.tool_definitions.memory_tool import MemoryTool
 
 
@@ -29,8 +14,8 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
-def _make_end_turn_response(task_done, thoughts, command=""):
-    """Build a mock Anthropic API response with stop_reason='end_turn'."""
+def _make_anthropic_response(task_done, thoughts, command=""):
+    """Build a mock Anthropic API response."""
     payload = json.dumps({
         "task_done": task_done,
         "thoughts": thoughts,
@@ -47,13 +32,24 @@ def _make_end_turn_response(task_done, thoughts, command=""):
     return resp
 
 
+def _make_openai_response(task_done, thoughts, command=""):
+    """Build a mock OpenAI API response with output_text."""
+    resp = Mock()
+    resp.output_text = json.dumps({
+        "task_done": task_done,
+        "thoughts": thoughts,
+        "command": command,
+    })
+    return resp
+
+
 # ---------------------------------------------------------------------------
 # Tests
 # ---------------------------------------------------------------------------
 
 @pytest.mark.unit
 class TestMemoryToolWiring:
-    """Unit tests for provider-agnostic MemoryTool dispatch with a mocked Anthropic client."""
+    """Unit tests for MemoryTool dispatch with a mocked Anthropic client."""
 
     @pytest.fixture()
     def memory_dir(self, tmp_path):
@@ -63,9 +59,9 @@ def memory_dir(self, tmp_path):
 
     @pytest.fixture()
     def bot(self, memory_dir):
-        """Create a MicroBot with Anthropic provider and a MemoryTool.
+        """Create a MicroBot with a mocked Anthropic provider and a MemoryTool.
 
-        The Anthropic client is mocked, but the rest of the stack is real:
+        The LLM client is mocked, but the rest of the stack is real:
         tool dispatch and memory file operations.
         """
         tool = MemoryTool(
@@ -109,12 +105,12 @@ def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir):
         # 1. ask(task) → API returns JSON command (memory create)
         # 2. ask(command output) → API returns end_turn (task_done=True)
         self._mock_client.messages.create.side_effect = [
-            _make_end_turn_response(
+            _make_anthropic_response(
                 task_done=False,
                 thoughts="I'll save a note to memory.",
                 command='memory create /memories/notes.md "Hello from integration test"',
             ),
-            _make_end_turn_response(
+            _make_anthropic_response(
                 task_done=True,
                 thoughts="Saved a note to memory successfully.",
             ),
@@ -144,12 +140,12 @@ def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir):
         (memory_dir / "existing.md").write_text("Previously saved content")
 
         self._mock_client.messages.create.side_effect = [
-            _make_end_turn_response(
+            _make_anthropic_response(
                 task_done=False,
                 thoughts="Let me check my memory.",
                 command="memory view /memories/existing.md",
             ),
-            _make_end_turn_response(
+            _make_anthropic_response(
                 task_done=True,
                 thoughts="Found previously saved content in memory.",
             ),
@@ -181,19 +177,19 @@ def test_create_then_view_memory_file(self, bot, memory_dir):
         """LLM creates a file, then views it — both dispatched via MicroBot loop."""
         self._mock_client.messages.create.side_effect = [
             # Step 1: create file
-            _make_end_turn_response(
+            _make_anthropic_response(
                 task_done=False,
                 thoughts="Creating a todo list.",
                 command='memory create /memories/todo.md "- Fix bug #42\n- Write tests"',
             ),
             # Step 2: view file
-            _make_end_turn_response(
+            _make_anthropic_response(
                 task_done=False,
                 thoughts="Let me verify what I wrote.",
                 command="memory view /memories/todo.md",
             ),
             # Step 3: done
-            _make_end_turn_response(
+            _make_anthropic_response(
                 task_done=True,
                 thoughts="Created and verified the todo list.",
             ),
@@ -218,12 +214,12 @@ def test_create_then_view_memory_file(self, bot, memory_dir):
     def test_non_memory_commands_go_to_environment(self, bot):
         """Regular shell commands should be dispatched to the environment, not the memory tool."""
         self._mock_client.messages.create.side_effect = [
-            _make_end_turn_response(
+            _make_anthropic_response(
                 task_done=False,
                 thoughts="Let me check the files.",
                 command="ls -la",
             ),
-            _make_end_turn_response(
+            _make_anthropic_response(
                 task_done=True,
                 thoughts="Done.",
             ),
@@ -241,11 +237,135 @@ def test_non_memory_commands_go_to_environment(self, bot):
 
 
 # ---------------------------------------------------------------------------
-# Real integration tests — require ANTHROPIC_API_KEY
+# OpenAI wiring tests (mocked)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.unit
+class TestMemoryToolWiringOpenAI:
+    """Unit tests for MemoryTool dispatch with a mocked OpenAI client."""
+
+    @pytest.fixture()
+    def memory_dir(self, tmp_path):
+        d = tmp_path / "memory"
+        d.mkdir()
+        return d
+
+    @pytest.fixture()
+    def bot(self, memory_dir):
+        tool = MemoryTool(
+            memory_dir=str(memory_dir),
+            usage_instructions_to_llm="Use the memory tool to persist notes.",
+        )
+
+        mock_env = Mock()
+        mock_env.execute.return_value = Mock(return_code=0, stdout="", stderr="")
+
+        openai_deployment = "gpt-4"
+
+        with patch("microbots.llm.openai_api.OpenAI") as mock_openai_cls, \
+             patch("microbots.llm.openai_api.api_key", "test-key"), \
+             patch("microbots.llm.openai_api.endpoint", "https://api.openai.com"), \
+             patch("microbots.llm.openai_api.deployment_name", openai_deployment):
+
+            bot = MicroBot(
+                model=f"azure-openai/{openai_deployment}",
+                system_prompt="You are a helpful assistant.",
+                additional_tools=[tool],
+                environment=mock_env,
+            )
+
+            self._mock_client = mock_openai_cls.return_value
+            yield bot
+            del bot
+
+    def test_memory_tool_is_retained(self, bot):
+        memory_tools = [t for t in bot.additional_tools if isinstance(t, MemoryTool)]
+        assert len(memory_tools) == 1
+
+    def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir):
+        """LLM requests a memory create → MicroBot dispatches → file appears on disk."""
+        self._mock_client.responses.create.side_effect = [
+            _make_openai_response(
+                task_done=False,
+                thoughts="I'll save a note to memory.",
+                command='memory create /memories/notes.md "Hello from OpenAI test"',
+            ),
+            _make_openai_response(
+                task_done=True,
+                thoughts="Saved a note to memory successfully.",
+            ),
+        ]
+
+        result: BotRunResult = bot.run(
+            "Save a note saying 'Hello from OpenAI test'",
+            max_iterations=5,
+            timeout_in_seconds=30,
+        )
+
+        assert result.status is True
+        assert result.error is None
+
+        created_file = memory_dir / "notes.md"
+        assert created_file.exists()
+        assert created_file.read_text() == "Hello from OpenAI test"
+
+    def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir):
+        """LLM requests a memory view → MicroBot dispatches → file content returned."""
+        (memory_dir / "existing.md").write_text("Previously saved content")
+
+        self._mock_client.responses.create.side_effect = [
+            _make_openai_response(
+                task_done=False,
+                thoughts="Let me check my memory.",
+                command="memory view /memories/existing.md",
+            ),
+            _make_openai_response(
+                task_done=True,
+                thoughts="Found previously saved content in memory.",
+            ),
+        ]
+
+        result: BotRunResult = bot.run(
+            "Check your memory for existing notes",
+            max_iterations=5,
+            timeout_in_seconds=30,
+        )
+
+        assert result.status is True
+
+    def test_non_memory_commands_go_to_environment(self, bot):
+        """Regular shell commands should be dispatched to the environment."""
+        self._mock_client.responses.create.side_effect = [
+            _make_openai_response(
+                task_done=False,
+                thoughts="Let me check the files.",
+                command="ls -la",
+            ),
+            _make_openai_response(
+                task_done=True,
+                thoughts="Done.",
+            ),
+        ]
+
+        result: BotRunResult = bot.run(
+            "List the files",
+            max_iterations=5,
+            timeout_in_seconds=30,
+        )
+
+        assert result.status is True
+        bot.environment.execute.assert_called_with("ls -la")
+
+
+# ---------------------------------------------------------------------------
+# Real integration tests — require Azure OpenAI + Docker
 # ---------------------------------------------------------------------------
 
-MEMORY_SYSTEM_PROMPT = f"""You are a helpful assistant with access to a memory tool.
-You can save and retrieve notes using the memory tool.
+from microbots.llm.llm import llm_output_format_str
+
+def _memory_system_prompt(sandbox_path):
+    return f"""You are a helpful assistant with access to a shell environment.
+The project code is available at {sandbox_path} in your shell environment.
 All your responses must be in this JSON format:
 {llm_output_format_str}
 The properties (task_done, thoughts, command) are mandatory on each response.
@@ -253,20 +373,59 @@ def test_non_memory_commands_go_to_environment(self, bot):
 """
 
 
-@pytest.mark.anthropic_integration
-@pytest.mark.docker
-class TestMemoryToolRealApi:
-    """End-to-end integration tests that hit the real Anthropic API.
+def _create_test_project(base_dir):
+    """Create a small Python project with deliberate code issues.
 
-    These tests exercise the full MicroBot → AnthropicApi → memory tool
-    pipeline with no mocking.  A real Docker environment is created
-    (matching the AgentBoss integration test pattern).
+    Returns the path to the project directory.
+    """
+    project = base_dir / "myproject"
+    project.mkdir()
+
+    # utils.py — unused import 'os'
+    (project / "utils.py").write_text(
+        "import os\n"
+        "import json\n"
+        "\n"
+        "\n"
+        "def parse_config(raw: str) -> dict:\n"
+        '    """Parse a JSON config string."""\n'
+        "    return json.loads(raw)\n"
+    )
+
+    # handler.py — missing null check around line 42
+    lines = [f"# handler.py — request handler\n"]
+    lines += [f"# line {i}\n" for i in range(2, 42)]
+    lines.append("def handle_request(request):\n")  # line 42
+    lines.append("    user = request.get('user')\n")  # line 43
+    lines.append("    return user.name\n")  # line 44 — will crash if user is None
+    (project / "handler.py").write_text("".join(lines))
+
+    # client.py — deprecated API call around line 88
+    lines = [f"# client.py — HTTP client wrapper\n"]
+    lines += [f"# line {i}\n" for i in range(2, 88)]
+    lines.append("import urllib.request\n")  # line 88
+    lines.append("def fetch(url):\n")  # line 89
+    lines.append("    return urllib.request.urlopen(url).read()  # deprecated: use requests\n")
+    (project / "client.py").write_text("".join(lines))
+
+    return project
+
+
+@pytest.mark.integration
+@pytest.mark.slow
+class TestMemoryToolRealIntegration:
+    """End-to-end integration tests with Azure OpenAI.
+
+    These tests exercise the full MicroBot → LLM → memory tool pipeline
+    with no mocking.  A small Python project with deliberate code issues
+    is mounted into the Docker sandbox so the bot has real files to work on.
 
     Run with::
 
-        pytest -m anthropic_integration
+        pytest -m integration test/bot/test_memory_tool_integration.py -v
 
-    Requires ``ANTHROPIC_API_KEY`` in ``.env``.
+    Requires ``AZURE_OPENAI_DEPLOYMENT_NAME`` (or defaults to
+    ``mini-swe-agent-gpt5``) and valid Azure OpenAI credentials.
     """
 
     @pytest.fixture()
@@ -276,102 +435,51 @@ def memory_dir(self, tmp_path):
         return d
 
     @pytest.fixture()
-    def memory_bot(self, memory_dir):
-        """Create a MicroBot with the real Anthropic API, real Docker env,
-        and a MemoryTool.  No mocking — fully end-to-end.
-        """
+    def test_project(self, tmp_path):
+        return _create_test_project(tmp_path)
+
+    @pytest.fixture()
+    def memory_bot(self, memory_dir, test_project):
         tool = MemoryTool(
             memory_dir=str(memory_dir),
-            usage_instructions_to_llm="Use the memory tool to persist notes.",
         )
 
-        anthropic_deployment = os.getenv("ANTHROPIC_DEPLOYMENT_NAME", "claude-sonnet-4-5")
+        model = f"azure-openai/{os.getenv('AZURE_OPENAI_DEPLOYMENT_NAME', 'mini-swe-agent-gpt5')}"
+
+        sandbox_path = f"{DOCKER_WORKING_DIR}/{test_project.name}"
+        folder_mount = Mount(
+            str(test_project),
+            sandbox_path,
+            PermissionLabels.READ_ONLY,
+        )
 
         bot = MicroBot(
-            model=f"anthropic/{anthropic_deployment}",
-            system_prompt=MEMORY_SYSTEM_PROMPT,
+            model=model,
+            system_prompt=_memory_system_prompt(sandbox_path),
             additional_tools=[tool],
+            folder_to_mount=folder_mount,
         )
 
         yield bot
         del bot
 
-    def test_memory_tool_is_retained(self, memory_bot):
-        """MemoryTool should remain attached for Anthropic just like other providers."""
-        memory_tools = [t for t in memory_bot.additional_tools if isinstance(t, MemoryTool)]
-        assert len(memory_tools) == 1, "Expected exactly one MemoryTool to remain attached"
-
     def test_create_memory_file(self, memory_bot, memory_dir):
-        """MicroBot should persist a debugging plan to memory.
-
-        The LLM is expected to:
-          1. Receive a task about planning a debugging session.
-          2. Decide to persist the plan using the memory tool.
-          3. Confirm the task is done.
-
-        We verify the plan was actually written to disk.
-        """
+        """MicroBot should use memory tool while reviewing code for issues."""
         result: BotRunResult = memory_bot.run(
             task=(
-                "You are investigating a bug where the server returns HTTP 500 "
-                "on POST /api/users. Create a debugging plan that includes: "
-                "1) check server logs, 2) reproduce the request with curl, "
-                "3) inspect the database connection. "
-                "Persist this plan so you can resume later if interrupted."
+                "Review the Python project for code quality issues. "
+                "Check each .py file for unused imports, missing error handling, "
+                "and deprecated API usage. Report all issues you find."
             ),
-            max_iterations=10,
-            timeout_in_seconds=60,
+            max_iterations=15,
+            timeout_in_seconds=300,
         )
 
         assert result.status is True, f"Task failed: {result.error}"
         assert result.error is None
 
-        # The LLM should have used the memory tool to persist the plan
         saved_files = [f for f in memory_dir.rglob("*") if f.is_file()]
         assert len(saved_files) >= 1, (
             f"Expected at least one file created in memory. "
             f"Found: {saved_files}"
         )
-        combined_content = "\n".join(f.read_text() for f in saved_files).lower()
-        assert "log" in combined_content or "curl" in combined_content or "database" in combined_content, (
-            f"Expected debugging plan content in memory files. Content: {combined_content}"
-        )
-
-    def test_create_and_view_roundtrip(self, memory_bot, memory_dir):
-        """MicroBot should save findings and then review them before reporting.
-
-        The LLM is expected to:
-          1. Record analysis findings using the memory tool.
-          2. Review what it recorded to verify nothing was missed.
-          3. Summarize the findings in its final thoughts.
-
-        We verify:
-          - At least one file was written to disk.
-          - The LLM's summary references the recorded findings.
-        """
-        result: BotRunResult = memory_bot.run(
-            task=(
-                "You analyzed a Python project and found these issues: "
-                "1) an unused import 'os' in utils.py, "
-                "2) a missing null check in handler.py line 42. "
-                "Record these findings, then review your notes and "
-                "summarize what you found in your final thoughts."
-            ),
-            max_iterations=15,
-            timeout_in_seconds=60,
-        )
-
-        assert result.status is True, f"Task failed: {result.error}"
-        assert result.error is None
-
-        # The LLM should have created at least one memory file
-        saved_files = [f for f in memory_dir.rglob("*") if f.is_file()]
-        assert len(saved_files) >= 1, (
-            f"Expected at least one file in memory. "
-            f"Found: {list(memory_dir.rglob('*'))}"
-        )
-
-        result_lower = result.result.lower()
-        assert "import" in result_lower or "null" in result_lower or "handler" in result_lower, (
-            f"LLM should have summarized the findings. Got: {result.result}"
-        )

From fc45c0e524c6b83cd59ec9288dec58b948f007c8 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Thu, 12 Mar 2026 10:38:52 +0000
Subject: [PATCH 17/20] Enhance MemoryTool: integrate argparse for command
 parsing and improve subcommand handling

---
 .../tools/tool_definitions/memory_tool.py     | 233 +++++++++---------
 .../tool_definitions/test_memory_tool.py      | 113 ++++-----
 2 files changed, 170 insertions(+), 176 deletions(-)

diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py
index c584f30..5f0c7fc 100644
--- a/src/microbots/tools/tool_definitions/memory_tool.py
+++ b/src/microbots/tools/tool_definitions/memory_tool.py
@@ -1,3 +1,4 @@
+import argparse
 import logging
 import os
 import shlex
@@ -12,6 +13,13 @@
 
 logger = logging.getLogger(" 🧠 MemoryTool")
 
+
+class _NoExitArgumentParser(argparse.ArgumentParser):
+    """ArgumentParser that raises ``ValueError`` instead of calling ``sys.exit``."""
+
+    def error(self, message: str) -> None:  # type: ignore[override]
+        raise ValueError(message)
+
 INSTRUCTIONS_TO_LLM = """
 Use this tool to persist information to files across steps — same interface as
 the Anthropic memory tool.  All paths must be under /memories/.
@@ -101,6 +109,42 @@ def __post_init__(self):
         base = Path(self.memory_dir) if self.memory_dir else Path.home() / ".microbots" / "memory"
         self._memory_dir = base
         self._memory_dir.mkdir(parents=True, exist_ok=True)
+        self._parser = self._build_parser()
+
+    def _build_parser(self) -> _NoExitArgumentParser:
+        """Build the argparse parser with subparsers for each memory subcommand."""
+        parser = _NoExitArgumentParser(prog="memory", add_help=False)
+        subs = parser.add_subparsers(dest="subcommand")
+
+        p_view = subs.add_parser("view", add_help=False)
+        p_view.add_argument("path")
+        p_view.add_argument("--start", type=int, default=None)
+        p_view.add_argument("--end", type=int, default=None)
+
+        p_create = subs.add_parser("create", add_help=False)
+        p_create.add_argument("path")
+        p_create.add_argument("content", nargs=argparse.REMAINDER)
+
+        p_str = subs.add_parser("str_replace", add_help=False)
+        p_str.add_argument("path")
+        p_str.add_argument("--old", required=True)
+        p_str.add_argument("--new", required=True)
+
+        p_ins = subs.add_parser("insert", add_help=False)
+        p_ins.add_argument("path")
+        p_ins.add_argument("--line", type=int, required=True)
+        p_ins.add_argument("--text", required=True)
+
+        p_del = subs.add_parser("delete", add_help=False)
+        p_del.add_argument("path")
+
+        p_ren = subs.add_parser("rename", add_help=False)
+        p_ren.add_argument("old_path")
+        p_ren.add_argument("new_path")
+
+        subs.add_parser("clear", add_help=False)
+
+        return parser
 
     # ---------------------------------------------------------------------- #
     # Path helpers
@@ -138,7 +182,8 @@ def _resolve(self, path: str) -> Path:
     # ---------------------------------------------------------------------- #
 
     def is_invoked(self, command: str) -> bool:
-        return command.strip().startswith("memory ")
+        cmd = command.strip()
+        return cmd == "memory" or cmd.startswith("memory ")
 
     def invoke(self, command: str, parent_bot) -> CmdReturn:
         try:
@@ -146,30 +191,28 @@ def invoke(self, command: str, parent_bot) -> CmdReturn:
         except ValueError as exc:
             return CmdReturn(stdout="", stderr=f"Parse error: {exc}", return_code=1)
 
-        if len(tokens) < 2:
+        try:
+            args = self._parser.parse_args(tokens[1:])  # skip "memory"
+        except ValueError as exc:
+            return CmdReturn(stdout="", stderr=str(exc), return_code=1)
+
+        if args.subcommand is None:
             return CmdReturn(stdout="", stderr="Usage: memory <subcommand> ...", return_code=1)
 
-        sub = tokens[1]
-        args = tokens[2:]
+        dispatch = {
+            "view": self._view,
+            "create": self._create,
+            "str_replace": self._str_replace,
+            "insert": self._insert,
+            "delete": self._delete,
+            "rename": self._rename,
+        }
 
         try:
-            if sub == "view":
-                return self._view(args)
-            elif sub == "create":
-                return self._create(args)
-            elif sub == "str_replace":
-                return self._str_replace(args)
-            elif sub == "insert":
-                return self._insert(args)
-            elif sub == "delete":
-                return self._delete(args)
-            elif sub == "rename":
-                return self._rename(args)
-            elif sub == "clear":
+            if args.subcommand == "clear":
                 return self._clear()
-            else:
-                return CmdReturn(stdout="", stderr=f"Unknown subcommand: {sub!r}", return_code=1)
-        except (ValueError, FileNotFoundError, RuntimeError) as exc:
+            return dispatch[args.subcommand](args)
+        except (OSError, ValueError, RuntimeError, UnicodeDecodeError) as exc:
             logger.error("🧠 MemoryTool error: %s", exc)
             return CmdReturn(stdout="", stderr=str(exc), return_code=1)
 
@@ -177,26 +220,10 @@ def invoke(self, command: str, parent_bot) -> CmdReturn:
     # Subcommand handlers
     # ---------------------------------------------------------------------- #
 
-    def _view(self, args: list) -> CmdReturn:
-        if not args:
-            return CmdReturn(stdout="", stderr="Usage: memory view <path> [--start N] [--end N]", return_code=1)
-
-        path = args[0]
-        start_line = None
-        end_line = None
-        i = 1
-        while i < len(args):
-            if args[i] == "--start" and i + 1 < len(args):
-                start_line = int(args[i + 1]); i += 2
-            elif args[i] == "--end" and i + 1 < len(args):
-                end_line = int(args[i + 1]); i += 2
-            else:
-                logger.warning("🧠 MemoryTool view: unknown flag %r (skipped)", args[i])
-                i += 1
-
-        resolved = self._resolve(path)
+    def _view(self, args: argparse.Namespace) -> CmdReturn:
+        resolved = self._resolve(args.path)
         if not resolved.exists():
-            return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1)
+            return CmdReturn(stdout="", stderr=f"Path not found: {args.path!r}", return_code=1)
 
         if resolved.is_dir():
             items = [
@@ -204,13 +231,13 @@ def _view(self, args: list) -> CmdReturn:
                 for item in sorted(resolved.iterdir())
                 if not item.name.startswith(".")
             ]
-            result = f"Directory: {path}\n" + "\n".join(f"- {i}" for i in items)
+            result = f"Directory: {args.path}\n" + "\n".join(f"- {i}" for i in items)
             return CmdReturn(stdout=result, stderr="", return_code=0)
 
         lines = resolved.read_text(encoding="utf-8").splitlines()
-        if start_line is not None or end_line is not None:
-            s = max(0, (start_line or 1) - 1)
-            e = len(lines) if (end_line is None or end_line == -1) else end_line
+        if args.start is not None or args.end is not None:
+            s = max(0, (args.start or 1) - 1)
+            e = len(lines) if (args.end is None or args.end == -1) else args.end
             lines = lines[s:e]
             base_num = s + 1
         else:
@@ -218,101 +245,65 @@ def _view(self, args: list) -> CmdReturn:
         numbered = "\n".join(f"{i + base_num:4d}: {line}" for i, line in enumerate(lines))
         return CmdReturn(stdout=numbered, stderr="", return_code=0)
 
-    def _create(self, args: list) -> CmdReturn:
-        if len(args) < 2:
+    def _create(self, args: argparse.Namespace) -> CmdReturn:
+        if not args.content:
             return CmdReturn(stdout="", stderr="Usage: memory create <path> <content>", return_code=1)
-        path, content = args[0], args[1]
-        resolved = self._resolve(path)
+        content = " ".join(args.content)
+        resolved = self._resolve(args.path)
         resolved.parent.mkdir(parents=True, exist_ok=True)
         resolved.write_text(content, encoding="utf-8")
-        logger.info("🧠 Memory file created: %s", path)
-        return CmdReturn(stdout=f"File created: {path}", stderr="", return_code=0)
-
-    def _str_replace(self, args: list) -> CmdReturn:
-        if not args:
-            return CmdReturn(stdout="", stderr="Usage: memory str_replace <path> --old <text> --new <text>", return_code=1)
-        path = args[0]
-        old_text = new_text = None
-        i = 1
-        while i < len(args):
-            if args[i] == "--old" and i + 1 < len(args):
-                old_text = args[i + 1]; i += 2
-            elif args[i] == "--new" and i + 1 < len(args):
-                new_text = args[i + 1]; i += 2
-            else:
-                logger.warning("🧠 MemoryTool str_replace: unknown flag %r (skipped)", args[i])
-                i += 1
-        if old_text is None or new_text is None:
-            return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1)
-        resolved = self._resolve(path)
+        logger.info("🧠 Memory file created: %s", args.path)
+        return CmdReturn(stdout=f"File created: {args.path}", stderr="", return_code=0)
+
+    def _str_replace(self, args: argparse.Namespace) -> CmdReturn:
+        resolved = self._resolve(args.path)
         if not resolved.is_file():
-            return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1)
+            return CmdReturn(stdout="", stderr=f"File not found: {args.path!r}", return_code=1)
         content = resolved.read_text(encoding="utf-8")
-        count = content.count(old_text)
+        count = content.count(args.old)
         if count == 0:
-            return CmdReturn(stdout="", stderr=f"Text not found in {path!r}", return_code=1)
+            return CmdReturn(stdout="", stderr=f"Text not found in {args.path!r}", return_code=1)
         if count > 1:
-            return CmdReturn(stdout="", stderr=f"Text appears {count} times in {path!r} — must be unique", return_code=1)
-        resolved.write_text(content.replace(old_text, new_text, 1), encoding="utf-8")
-        return CmdReturn(stdout=f"File {path} edited.", stderr="", return_code=0)
-
-    def _insert(self, args: list) -> CmdReturn:
-        if not args:
-            return CmdReturn(stdout="", stderr="Usage: memory insert <path> --line N --text <text>", return_code=1)
-        path = args[0]
-        line_num = text = None
-        i = 1
-        while i < len(args):
-            if args[i] == "--line" and i + 1 < len(args):
-                line_num = int(args[i + 1]); i += 2
-            elif args[i] == "--text" and i + 1 < len(args):
-                text = args[i + 1]; i += 2
-            else:
-                logger.warning("🧠 MemoryTool insert: unknown flag %r (skipped)", args[i])
-                i += 1
-        if line_num is None or text is None:
-            return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1)
-        resolved = self._resolve(path)
+            return CmdReturn(stdout="", stderr=f"Text appears {count} times in {args.path!r} — must be unique", return_code=1)
+        resolved.write_text(content.replace(args.old, args.new, 1), encoding="utf-8")
+        return CmdReturn(stdout=f"File {args.path} edited.", stderr="", return_code=0)
+
+    def _insert(self, args: argparse.Namespace) -> CmdReturn:
+        resolved = self._resolve(args.path)
         if not resolved.is_file():
-            return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1)
-        lines = resolved.read_text(encoding="utf-8").splitlines()
-        if line_num < 0 or line_num > len(lines):
-            return CmdReturn(stdout="", stderr=f"Invalid line number {line_num}. Must be 0–{len(lines)}.", return_code=1)
-        lines.insert(line_num, text.rstrip("\n"))
-        resolved.write_text("\n".join(lines) + "\n", encoding="utf-8")
-        return CmdReturn(stdout=f"Text inserted at line {line_num} in {path}.", stderr="", return_code=0)
-
-    def _delete(self, args: list) -> CmdReturn:
-        if not args:
-            return CmdReturn(stdout="", stderr="Usage: memory delete <path>", return_code=1)
-        path = args[0]
-        if path.rstrip("/") in ("/memories", "memories", ""):
+            return CmdReturn(stdout="", stderr=f"File not found: {args.path!r}", return_code=1)
+        file_lines = resolved.read_text(encoding="utf-8").splitlines()
+        if args.line < 0 or args.line > len(file_lines):
+            return CmdReturn(stdout="", stderr=f"Invalid line number {args.line}. Must be 0–{len(file_lines)}.", return_code=1)
+        file_lines.insert(args.line, args.text.rstrip("\n"))
+        resolved.write_text("\n".join(file_lines) + "\n", encoding="utf-8")
+        return CmdReturn(stdout=f"Text inserted at line {args.line} in {args.path}.", stderr="", return_code=0)
+
+    def _delete(self, args: argparse.Namespace) -> CmdReturn:
+        if args.path.rstrip("/") in ("/memories", "memories", ""):
             return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1)
-        resolved = self._resolve(path)
+        resolved = self._resolve(args.path)
         if resolved.is_file():
             resolved.unlink()
-            logger.info("🧠 Memory file deleted: %s", path)
-            return CmdReturn(stdout=f"Deleted: {path}", stderr="", return_code=0)
+            logger.info("🧠 Memory file deleted: %s", args.path)
+            return CmdReturn(stdout=f"Deleted: {args.path}", stderr="", return_code=0)
         if resolved.is_dir():
             shutil.rmtree(resolved)
-            logger.info("🧠 Memory directory deleted: %s", path)
-            return CmdReturn(stdout=f"Deleted directory: {path}", stderr="", return_code=0)
-        return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1)
-
-    def _rename(self, args: list) -> CmdReturn:
-        if len(args) < 2:
-            return CmdReturn(stdout="", stderr="Usage: memory rename <old_path> <new_path>", return_code=1)
-        old_path, new_path = args[0], args[1]
-        old_resolved = self._resolve(old_path)
-        new_resolved = self._resolve(new_path)
+            logger.info("🧠 Memory directory deleted: %s", args.path)
+            return CmdReturn(stdout=f"Deleted directory: {args.path}", stderr="", return_code=0)
+        return CmdReturn(stdout="", stderr=f"Path not found: {args.path!r}", return_code=1)
+
+    def _rename(self, args: argparse.Namespace) -> CmdReturn:
+        old_resolved = self._resolve(args.old_path)
+        new_resolved = self._resolve(args.new_path)
         if not old_resolved.exists():
-            return CmdReturn(stdout="", stderr=f"Source not found: {old_path!r}", return_code=1)
+            return CmdReturn(stdout="", stderr=f"Source not found: {args.old_path!r}", return_code=1)
         if new_resolved.exists():
-            return CmdReturn(stdout="", stderr=f"Destination already exists: {new_path!r}", return_code=1)
+            return CmdReturn(stdout="", stderr=f"Destination already exists: {args.new_path!r}", return_code=1)
         new_resolved.parent.mkdir(parents=True, exist_ok=True)
         old_resolved.rename(new_resolved)
-        logger.info("🧠 Memory renamed: %s → %s", old_path, new_path)
-        return CmdReturn(stdout=f"Renamed {old_path} to {new_path}.", stderr="", return_code=0)
+        logger.info("🧠 Memory renamed: %s → %s", args.old_path, args.new_path)
+        return CmdReturn(stdout=f"Renamed {args.old_path} to {args.new_path}.", stderr="", return_code=0)
 
     def _clear(self) -> CmdReturn:
         if self._memory_dir.exists():
diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py
index 6345b67..1ff1df6 100644
--- a/test/tools/tool_definitions/test_memory_tool.py
+++ b/test/tools/tool_definitions/test_memory_tool.py
@@ -5,6 +5,7 @@
 user's real ~/.microbots/memory directory.
 """
 import pytest
+from argparse import Namespace
 from pathlib import Path
 from unittest.mock import Mock
 
@@ -56,6 +57,12 @@ def test_returns_true_for_memory_commands(self, tmp_path):
         assert tool.is_invoked("memory view /memories") is True
         assert tool.is_invoked("memory create /memories/f.md hello") is True
 
+    def test_returns_true_for_bare_memory(self, tmp_path):
+        tool = make_tool(tmp_path)
+        assert tool.is_invoked("memory") is True
+        assert tool.is_invoked("memory\n") is True
+        assert tool.is_invoked("  memory  ") is True
+
     def test_returns_false_for_other_commands(self, tmp_path):
         tool = make_tool(tmp_path)
         assert tool.is_invoked("ls -la") is False
@@ -134,7 +141,7 @@ def test_view_directory_lists_contents(self, tmp_path):
         (tool._memory_dir / "notes.md").write_text("hello")
         (tool._memory_dir / "sub").mkdir()
 
-        result = tool._view(["/memories"])
+        result = tool._view(Namespace(path="/memories", start=None, end=None))
 
         assert result.return_code == 0
         assert "notes.md" in result.stdout
@@ -144,7 +151,7 @@ def test_view_file_returns_numbered_lines(self, tmp_path):
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("line1\nline2\nline3\n")
 
-        result = tool._view(["/memories/f.md"])
+        result = tool._view(Namespace(path="/memories/f.md", start=None, end=None))
 
         assert result.return_code == 0
         assert "1:" in result.stdout
@@ -155,7 +162,7 @@ def test_view_file_with_line_range(self, tmp_path):
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n")
 
-        result = tool._view(["/memories/f.md", "--start", "2", "--end", "4"])
+        result = tool._view(Namespace(path="/memories/f.md", start=2, end=4))
 
         assert result.return_code == 0
         assert "b" in result.stdout
@@ -166,22 +173,21 @@ def test_view_file_with_line_range(self, tmp_path):
     def test_view_nonexistent_path_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
 
-        result = tool._view(["/memories/nonexistent.md"])
+        result = tool._view(Namespace(path="/memories/nonexistent.md", start=None, end=None))
 
         assert result.return_code != 0
 
     def test_view_no_args_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
-        result = tool._view([])
+        result = tool.invoke("memory view", parent_bot=Mock())
         assert result.return_code != 0
 
-    def test_view_unknown_flag_is_skipped(self, tmp_path):
-        """else: i += 1 — unrecognised flags are silently skipped."""
+    def test_view_unknown_flag_returns_error(self, tmp_path):
+        """Argparse rejects unrecognised flags."""
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("hello\n")
-        result = tool._view(["/memories/f.md", "--bogus", "value"])
-        assert result.return_code == 0
-        assert "hello" in result.stdout
+        result = tool.invoke("memory view /memories/f.md --bogus value", parent_bot=Mock())
+        assert result.return_code != 0
 
 
 # ---------------------------------------------------------------------------
@@ -194,7 +200,7 @@ class TestMemoryToolCreate:
     def test_create_writes_file(self, tmp_path):
         tool = make_tool(tmp_path)
 
-        result = tool._create(["/memories/notes.md", "hello world"])
+        result = tool._create(Namespace(path="/memories/notes.md", content=["hello world"]))
 
         assert result.return_code == 0
         assert (tool._memory_dir / "notes.md").read_text() == "hello world"
@@ -203,7 +209,7 @@ def test_create_overwrites_existing_file(self, tmp_path):
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("old content")
 
-        result = tool._create(["/memories/f.md", "new content"])
+        result = tool._create(Namespace(path="/memories/f.md", content=["new content"]))
 
         assert result.return_code == 0
         assert (tool._memory_dir / "f.md").read_text() == "new content"
@@ -211,14 +217,14 @@ def test_create_overwrites_existing_file(self, tmp_path):
     def test_create_creates_parent_directories(self, tmp_path):
         tool = make_tool(tmp_path)
 
-        result = tool._create(["/memories/sub/dir/f.md", "content"])
+        result = tool._create(Namespace(path="/memories/sub/dir/f.md", content=["content"]))
 
         assert result.return_code == 0
         assert (tool._memory_dir / "sub" / "dir" / "f.md").exists()
 
     def test_create_missing_args_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
-        result = tool._create(["/memories/f.md"])  # missing content
+        result = tool.invoke("memory create /memories/f.md", parent_bot=Mock())  # missing content
         assert result.return_code != 0
 
 
@@ -233,7 +239,7 @@ def test_str_replace_replaces_unique_text(self, tmp_path):
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("hello world")
 
-        result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "goodbye"])
+        result = tool._str_replace(Namespace(path="/memories/f.md", old="hello", new="goodbye"))
 
         assert result.return_code == 0
         assert (tool._memory_dir / "f.md").read_text() == "goodbye world"
@@ -242,7 +248,7 @@ def test_str_replace_fails_when_text_not_found(self, tmp_path):
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("hello world")
 
-        result = tool._str_replace(["/memories/f.md", "--old", "nothere", "--new", "x"])
+        result = tool._str_replace(Namespace(path="/memories/f.md", old="nothere", new="x"))
 
         assert result.return_code != 0
         assert "not found" in result.stderr.lower()
@@ -251,7 +257,7 @@ def test_str_replace_fails_when_text_not_unique(self, tmp_path):
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("hello hello")
 
-        result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "bye"])
+        result = tool._str_replace(Namespace(path="/memories/f.md", old="hello", new="bye"))
 
         assert result.return_code != 0
         assert "2" in result.stderr  # appears N times
@@ -259,30 +265,29 @@ def test_str_replace_fails_when_text_not_unique(self, tmp_path):
     def test_str_replace_missing_flags_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("text")
-        result = tool._str_replace(["/memories/f.md"])
+        result = tool.invoke("memory str_replace /memories/f.md", parent_bot=Mock())
         assert result.return_code != 0
 
     def test_str_replace_empty_args_returns_usage_error(self, tmp_path):
-        """if not args branch: calling _str_replace([]) returns the usage message."""
+        """Calling str_replace with no args returns an error via argparse."""
         tool = make_tool(tmp_path)
-        result = tool._str_replace([])
+        result = tool.invoke("memory str_replace", parent_bot=Mock())
         assert result.return_code == 1
-        assert "Usage: memory str_replace" in result.stderr
 
     def test_str_replace_nonexistent_file_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
-        result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"])
+        result = tool._str_replace(Namespace(path="/memories/missing.md", old="a", new="b"))
         assert result.return_code != 0
 
-    def test_str_replace_unknown_flag_is_skipped(self, tmp_path):
-        """else: i += 1 — unrecognised flags in the arg loop are silently skipped."""
+    def test_str_replace_unknown_flag_returns_error(self, tmp_path):
+        """Argparse rejects unrecognised flags."""
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("hello world")
-        result = tool._str_replace([
-            "/memories/f.md", "--bogus", "ignored", "--old", "hello", "--new", "goodbye"
-        ])
-        assert result.return_code == 0
-        assert (tool._memory_dir / "f.md").read_text() == "goodbye world"
+        result = tool.invoke(
+            'memory str_replace /memories/f.md --bogus ignored --old "hello" --new "goodbye"',
+            parent_bot=Mock(),
+        )
+        assert result.return_code != 0
 
 
 # ---------------------------------------------------------------------------
@@ -296,7 +301,7 @@ def test_insert_prepends_at_line_zero(self, tmp_path):
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("line1\nline2\n")
 
-        result = tool._insert(["/memories/f.md", "--line", "0", "--text", "prepended"])
+        result = tool._insert(Namespace(path="/memories/f.md", line=0, text="prepended"))
 
         assert result.return_code == 0
         lines = (tool._memory_dir / "f.md").read_text().splitlines()
@@ -307,7 +312,7 @@ def test_insert_at_end_of_file(self, tmp_path):
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("line1\nline2\n")
 
-        result = tool._insert(["/memories/f.md", "--line", "2", "--text", "appended"])
+        result = tool._insert(Namespace(path="/memories/f.md", line=2, text="appended"))
 
         assert result.return_code == 0
         lines = (tool._memory_dir / "f.md").read_text().splitlines()
@@ -317,38 +322,36 @@ def test_insert_invalid_line_number_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("line1\n")
 
-        result = tool._insert(["/memories/f.md", "--line", "99", "--text", "x"])
+        result = tool._insert(Namespace(path="/memories/f.md", line=99, text="x"))
 
         assert result.return_code != 0
 
     def test_insert_nonexistent_file_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
-        result = tool._insert(["/memories/missing.md", "--line", "0", "--text", "x"])
+        result = tool._insert(Namespace(path="/memories/missing.md", line=0, text="x"))
         assert result.return_code != 0
 
     def test_insert_missing_flags_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("line1\n")
-        result = tool._insert(["/memories/f.md"])
+        result = tool.invoke("memory insert /memories/f.md", parent_bot=Mock())
         assert result.return_code != 0
 
     def test_insert_empty_args_returns_usage_error(self, tmp_path):
-        """if not args branch: calling _insert([]) returns the usage message."""
+        """Calling insert with no args returns an error via argparse."""
         tool = make_tool(tmp_path)
-        result = tool._insert([])
+        result = tool.invoke("memory insert", parent_bot=Mock())
         assert result.return_code == 1
-        assert "Usage: memory insert" in result.stderr
 
-    def test_insert_unknown_flag_is_skipped(self, tmp_path):
-        """else: i += 1 — unrecognised flags in the arg loop are silently skipped."""
+    def test_insert_unknown_flag_returns_error(self, tmp_path):
+        """Argparse rejects unrecognised flags."""
         tool = make_tool(tmp_path)
         (tool._memory_dir / "f.md").write_text("line1\nline2\n")
-        result = tool._insert([
-            "/memories/f.md", "--bogus", "ignored", "--line", "0", "--text", "prepended"
-        ])
-        assert result.return_code == 0
-        lines = (tool._memory_dir / "f.md").read_text().splitlines()
-        assert lines[0] == "prepended"
+        result = tool.invoke(
+            'memory insert /memories/f.md --bogus ignored --line 0 --text "prepended"',
+            parent_bot=Mock(),
+        )
+        assert result.return_code != 0
 
 
 # ---------------------------------------------------------------------------
@@ -363,7 +366,7 @@ def test_delete_removes_file(self, tmp_path):
         f = tool._memory_dir / "f.md"
         f.write_text("data")
 
-        result = tool._delete(["/memories/f.md"])
+        result = tool._delete(Namespace(path="/memories/f.md"))
 
         assert result.return_code == 0
         assert not f.exists()
@@ -374,7 +377,7 @@ def test_delete_removes_directory(self, tmp_path):
         sub.mkdir()
         (sub / "f.md").write_text("data")
 
-        result = tool._delete(["/memories/sub"])
+        result = tool._delete(Namespace(path="/memories/sub"))
 
         assert result.return_code == 0
         assert not sub.exists()
@@ -382,17 +385,17 @@ def test_delete_removes_directory(self, tmp_path):
     def test_delete_prevents_root_deletion(self, tmp_path):
         tool = make_tool(tmp_path)
         for path in ("/memories", "memories", "/memories/"):
-            result = tool._delete([path])
+            result = tool._delete(Namespace(path=path))
             assert result.return_code != 0
 
     def test_delete_nonexistent_path_raises(self, tmp_path):
         tool = make_tool(tmp_path)
-        result = tool._delete(["/memories/nonexistent.md"])
+        result = tool._delete(Namespace(path="/memories/nonexistent.md"))
         assert result.return_code != 0
 
     def test_delete_no_args_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
-        result = tool._delete([])
+        result = tool.invoke("memory delete", parent_bot=Mock())
         assert result.return_code != 0
 
 
@@ -408,7 +411,7 @@ def test_rename_moves_file(self, tmp_path):
         src = tool._memory_dir / "old.md"
         src.write_text("content")
 
-        result = tool._rename(["/memories/old.md", "/memories/new.md"])
+        result = tool._rename(Namespace(old_path="/memories/old.md", new_path="/memories/new.md"))
 
         assert result.return_code == 0
         assert not src.exists()
@@ -416,7 +419,7 @@ def test_rename_moves_file(self, tmp_path):
 
     def test_rename_nonexistent_source_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
-        result = tool._rename(["/memories/missing.md", "/memories/new.md"])
+        result = tool._rename(Namespace(old_path="/memories/missing.md", new_path="/memories/new.md"))
         assert result.return_code != 0
 
     def test_rename_fails_if_destination_exists(self, tmp_path):
@@ -424,13 +427,13 @@ def test_rename_fails_if_destination_exists(self, tmp_path):
         (tool._memory_dir / "a.md").write_text("a")
         (tool._memory_dir / "b.md").write_text("b")
 
-        result = tool._rename(["/memories/a.md", "/memories/b.md"])
+        result = tool._rename(Namespace(old_path="/memories/a.md", new_path="/memories/b.md"))
 
         assert result.return_code != 0
 
     def test_rename_missing_args_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
-        result = tool._rename(["/memories/a.md"])
+        result = tool.invoke("memory rename /memories/a.md", parent_bot=Mock())
         assert result.return_code != 0
 
 
@@ -510,7 +513,7 @@ def test_invoke_unknown_subcommand_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)
         result = tool.invoke("memory frobnicate /memories/f.md", parent_bot=Mock())
         assert result.return_code != 0
-        assert "Unknown subcommand" in result.stderr
+        assert "invalid choice" in result.stderr
 
     def test_invoke_too_few_tokens_returns_error(self, tmp_path):
         tool = make_tool(tmp_path)

From 3a9592d3b570ff56b3b8d01dfe9291464fbb1f87 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Fri, 13 Mar 2026 02:00:33 +0000
Subject: [PATCH 18/20] resolve copilot comments

---
 .../tools/tool_definitions/memory_tool.py     | 15 ++--
 .../tool_definitions/test_memory_tool.py      | 76 ++++++++++++++++---
 2 files changed, 74 insertions(+), 17 deletions(-)

diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py
index 5f0c7fc..599137a 100644
--- a/src/microbots/tools/tool_definitions/memory_tool.py
+++ b/src/microbots/tools/tool_definitions/memory_tool.py
@@ -158,17 +158,15 @@ def _resolve(self, path: str) -> Path:
         if ".." in Path(stripped).parts:
             raise ValueError(f"Path traversal not allowed: {path!r}")
 
-        if path.startswith("/") and stripped != "memories" and not stripped.startswith("memories/"):
+        if stripped != "memories" and not stripped.startswith("memories/"):
             raise ValueError(
-                f"Invalid memory path: {path!r}. Use paths under /memories/."
+                f"Invalid memory path: {path!r}. Paths must start with /memories/."
             )
 
         if stripped == "memories":
             rel = ""
-        elif stripped.startswith("memories/"):
-            rel = stripped[len("memories/"):]
         else:
-            rel = stripped  # treat as relative to memory_dir
+            rel = stripped[len("memories/"):]
 
         resolved = (self._memory_dir / rel).resolve() if rel else self._memory_dir.resolve()
         # Use trailing separator to prevent prefix confusion with sibling dirs
@@ -283,6 +281,8 @@ def _delete(self, args: argparse.Namespace) -> CmdReturn:
         if args.path.rstrip("/") in ("/memories", "memories", ""):
             return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1)
         resolved = self._resolve(args.path)
+        if resolved == self._memory_dir.resolve():
+            return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1)
         if resolved.is_file():
             resolved.unlink()
             logger.info("🧠 Memory file deleted: %s", args.path)
@@ -296,6 +296,11 @@ def _delete(self, args: argparse.Namespace) -> CmdReturn:
     def _rename(self, args: argparse.Namespace) -> CmdReturn:
         old_resolved = self._resolve(args.old_path)
         new_resolved = self._resolve(args.new_path)
+        memory_root = self._memory_dir.resolve()
+        if old_resolved == memory_root:
+            return CmdReturn(stdout="", stderr="Cannot rename the /memories root directory", return_code=1)
+        if new_resolved == memory_root:
+            return CmdReturn(stdout="", stderr="Cannot overwrite the /memories root directory", return_code=1)
         if not old_resolved.exists():
             return CmdReturn(stdout="", stderr=f"Source not found: {args.old_path!r}", return_code=1)
         if new_resolved.exists():
diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py
index 1ff1df6..e7de107 100644
--- a/test/tools/tool_definitions/test_memory_tool.py
+++ b/test/tools/tool_definitions/test_memory_tool.py
@@ -114,19 +114,13 @@ def test_resolve_rejects_non_memory_paths(self, tmp_path):
             with pytest.raises(ValueError):
                 tool._resolve(bad)
 
-    def test_resolve_bare_relative_path_treated_as_relative_to_memory_dir(self, tmp_path):
-        """The else branch: a path without a /memories/ prefix is resolved
-        relative to memory_dir."""
+    def test_resolve_rejects_bare_relative_paths(self, tmp_path):
+        """Bare relative paths without /memories/ prefix must be rejected
+        to match the documented contract."""
         tool = make_tool(tmp_path)
-        resolved = tool._resolve("notes.md")
-        assert resolved == (tool._memory_dir / "notes.md").resolve()
-
-    def test_resolve_bare_relative_subdir_path(self, tmp_path):
-        """A bare relative path with subdirectory components is also resolved
-        relative to memory_dir (else branch)."""
-        tool = make_tool(tmp_path)
-        resolved = tool._resolve("sub/dir/file.md")
-        assert resolved == (tool._memory_dir / "sub" / "dir" / "file.md").resolve()
+        for bad in ("notes.md", "sub/dir/file.md", "file.txt"):
+            with pytest.raises(ValueError, match="Paths must start with /memories/"):
+                tool._resolve(bad)
 
 
 # ---------------------------------------------------------------------------
@@ -388,6 +382,28 @@ def test_delete_prevents_root_deletion(self, tmp_path):
             result = tool._delete(Namespace(path=path))
             assert result.return_code != 0
 
+    @pytest.mark.parametrize("path", [
+        "/memories/.",       # dot resolves to root
+        "/memories/./",      # trailing slash variant
+        "//memories",        # double leading slash
+        "//memories/",       # double leading slash + trailing slash
+        "memories/.",        # relative with dot
+        "memories/./",       # relative with dot + trailing slash
+    ])
+    def test_delete_prevents_root_deletion_normalized_paths(self, tmp_path, path):
+        """Root-equivalent paths that bypass the simple string guard must
+        still be rejected so that the entire memory directory is never
+        wiped out via ``_delete``."""
+        tool = make_tool(tmp_path)
+        # Seed the directory so we can verify it survives
+        (tool._memory_dir / "keep.md").write_text("important")
+
+        result = tool._delete(Namespace(path=path))
+
+        assert result.return_code != 0, f"path {path!r} was not rejected"
+        assert tool._memory_dir.exists(), f"path {path!r} deleted the memory root"
+        assert (tool._memory_dir / "keep.md").exists(), f"path {path!r} wiped memory contents"
+
     def test_delete_nonexistent_path_raises(self, tmp_path):
         tool = make_tool(tmp_path)
         result = tool._delete(Namespace(path="/memories/nonexistent.md"))
@@ -436,6 +452,42 @@ def test_rename_missing_args_returns_error(self, tmp_path):
         result = tool.invoke("memory rename /memories/a.md", parent_bot=Mock())
         assert result.return_code != 0
 
+    @pytest.mark.parametrize("old_path", [
+        "/memories",
+        "/memories/",
+        "/memories/.",
+        "//memories",
+        "memories/.",
+    ])
+    def test_rename_prevents_renaming_root_as_source(self, tmp_path, old_path):
+        """Renaming the memory root itself must be rejected."""
+        tool = make_tool(tmp_path)
+        (tool._memory_dir / "keep.md").write_text("important")
+
+        result = tool._rename(Namespace(old_path=old_path, new_path="/memories/newdir"))
+
+        assert result.return_code != 0, f"old_path {old_path!r} was not rejected"
+        assert tool._memory_dir.exists(), f"old_path {old_path!r} moved the memory root"
+        assert (tool._memory_dir / "keep.md").exists()
+
+    @pytest.mark.parametrize("new_path", [
+        "/memories",
+        "/memories/",
+        "/memories/.",
+        "//memories",
+        "memories/.",
+    ])
+    def test_rename_prevents_overwriting_root_as_destination(self, tmp_path, new_path):
+        """Renaming onto the memory root must be rejected."""
+        tool = make_tool(tmp_path)
+        src = tool._memory_dir / "src.md"
+        src.write_text("data")
+
+        result = tool._rename(Namespace(old_path="/memories/src.md", new_path=new_path))
+
+        assert result.return_code != 0, f"new_path {new_path!r} was not rejected"
+        assert src.exists()
+
 
 # ---------------------------------------------------------------------------
 # _clear

From 8904e347cab3e418b3de8ea81050c7cbde913ff4 Mon Sep 17 00:00:00 2001
From: KavyaSree2610 <92566732+KavyaSree2610@users.noreply.github.com>
Date: Fri, 13 Mar 2026 07:47:12 +0530
Subject: [PATCH 19/20] Remove unused import

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 test/tools/tool_definitions/test_memory_tool.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py
index e7de107..579ca23 100644
--- a/test/tools/tool_definitions/test_memory_tool.py
+++ b/test/tools/tool_definitions/test_memory_tool.py
@@ -10,7 +10,6 @@
 from unittest.mock import Mock
 
 from microbots.tools.tool_definitions.memory_tool import MemoryTool
-from microbots.environment.Environment import CmdReturn
 
 
 # ---------------------------------------------------------------------------

From 77578bddeeb45ff6c0c919711382ceba25ec8200 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Fri, 13 Mar 2026 08:27:09 +0000
Subject: [PATCH 20/20] Refactor MemoryTool documentation and validation:
 improve path validation messages and update usage instructions

---
 .../tools/tool_definitions/memory_tool.py     | 27 +++++++------------
 .../tool_definitions/test_memory_tool.py      |  8 ++----
 2 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py
index 599137a..0dd5ec2 100644
--- a/src/microbots/tools/tool_definitions/memory_tool.py
+++ b/src/microbots/tools/tool_definitions/memory_tool.py
@@ -21,8 +21,8 @@ def error(self, message: str) -> None:  # type: ignore[override]
         raise ValueError(message)
 
 INSTRUCTIONS_TO_LLM = """
-Use this tool to persist information to files across steps — same interface as
-the Anthropic memory tool.  All paths must be under /memories/.
+Use this tool to persist information to files across steps.
+All paths must be under /memories/.
 
 MEMORY PROTOCOL:
 1. ALWAYS run `memory view /memories` BEFORE doing anything else to check for
@@ -86,16 +86,6 @@ class MemoryTool(ExternalTool):
     All files are stored under ``memory_dir`` on the host (default
     ``~/.microbots/memory``).  The LLM uses paths like ``/memories/notes.md``
     which are resolved relative to ``memory_dir``.
-
-    Supported subcommands
-    ---------------------
-    memory view <path> [--start N] [--end N]
-    memory create <path> <content>
-    memory str_replace <path> --old <text> --new <text>
-    memory insert <path> --line N --text <text>
-    memory delete <path>
-    memory rename <old> <new>
-    memory clear
     """
 
     name: str = Field(default="memory")
@@ -152,6 +142,11 @@ def _build_parser(self) -> _NoExitArgumentParser:
 
     def _resolve(self, path: str) -> Path:
         """Resolve a /memories/… path to an absolute host path."""
+        if not path.startswith("/"):
+            raise ValueError(
+                f"Invalid memory path: {path!r}. Paths must start with /memories/."
+            )
+
         stripped = path.lstrip("/")
 
         # Reject any path containing '..' components before resolving
@@ -262,9 +257,9 @@ def _str_replace(self, args: argparse.Namespace) -> CmdReturn:
         if count == 0:
             return CmdReturn(stdout="", stderr=f"Text not found in {args.path!r}", return_code=1)
         if count > 1:
-            return CmdReturn(stdout="", stderr=f"Text appears {count} times in {args.path!r} — must be unique", return_code=1)
+            return CmdReturn(stdout="", stderr=f"Text appears {count} times in {args.path!r} - must be unique", return_code=1)
         resolved.write_text(content.replace(args.old, args.new, 1), encoding="utf-8")
-        return CmdReturn(stdout=f"File {args.path} edited.", stderr="", return_code=0)
+        return CmdReturn(stdout=f"File {args.path} has been edited.", stderr="", return_code=0)
 
     def _insert(self, args: argparse.Namespace) -> CmdReturn:
         resolved = self._resolve(args.path)
@@ -272,14 +267,12 @@ def _insert(self, args: argparse.Namespace) -> CmdReturn:
             return CmdReturn(stdout="", stderr=f"File not found: {args.path!r}", return_code=1)
         file_lines = resolved.read_text(encoding="utf-8").splitlines()
         if args.line < 0 or args.line > len(file_lines):
-            return CmdReturn(stdout="", stderr=f"Invalid line number {args.line}. Must be 0–{len(file_lines)}.", return_code=1)
+            return CmdReturn(stdout="", stderr=f"Invalid line number {args.line}. Must be 0 - {len(file_lines)}.", return_code=1)
         file_lines.insert(args.line, args.text.rstrip("\n"))
         resolved.write_text("\n".join(file_lines) + "\n", encoding="utf-8")
         return CmdReturn(stdout=f"Text inserted at line {args.line} in {args.path}.", stderr="", return_code=0)
 
     def _delete(self, args: argparse.Namespace) -> CmdReturn:
-        if args.path.rstrip("/") in ("/memories", "memories", ""):
-            return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1)
         resolved = self._resolve(args.path)
         if resolved == self._memory_dir.resolve():
             return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1)
diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py
index 579ca23..e47bc49 100644
--- a/test/tools/tool_definitions/test_memory_tool.py
+++ b/test/tools/tool_definitions/test_memory_tool.py
@@ -117,7 +117,7 @@ def test_resolve_rejects_bare_relative_paths(self, tmp_path):
         """Bare relative paths without /memories/ prefix must be rejected
         to match the documented contract."""
         tool = make_tool(tmp_path)
-        for bad in ("notes.md", "sub/dir/file.md", "file.txt"):
+        for bad in ("notes.md", "sub/dir/file.md", "file.txt", "memories/foo.md"):
             with pytest.raises(ValueError, match="Paths must start with /memories/"):
                 tool._resolve(bad)
 
@@ -377,7 +377,7 @@ def test_delete_removes_directory(self, tmp_path):
 
     def test_delete_prevents_root_deletion(self, tmp_path):
         tool = make_tool(tmp_path)
-        for path in ("/memories", "memories", "/memories/"):
+        for path in ("/memories", "/memories/"):
             result = tool._delete(Namespace(path=path))
             assert result.return_code != 0
 
@@ -386,8 +386,6 @@ def test_delete_prevents_root_deletion(self, tmp_path):
         "/memories/./",      # trailing slash variant
         "//memories",        # double leading slash
         "//memories/",       # double leading slash + trailing slash
-        "memories/.",        # relative with dot
-        "memories/./",       # relative with dot + trailing slash
     ])
     def test_delete_prevents_root_deletion_normalized_paths(self, tmp_path, path):
         """Root-equivalent paths that bypass the simple string guard must
@@ -456,7 +454,6 @@ def test_rename_missing_args_returns_error(self, tmp_path):
         "/memories/",
         "/memories/.",
         "//memories",
-        "memories/.",
     ])
     def test_rename_prevents_renaming_root_as_source(self, tmp_path, old_path):
         """Renaming the memory root itself must be rejected."""
@@ -474,7 +471,6 @@ def test_rename_prevents_renaming_root_as_source(self, tmp_path, old_path):
         "/memories/",
         "/memories/.",
         "//memories",
-        "memories/.",
     ])
     def test_rename_prevents_overwriting_root_as_destination(self, tmp_path, new_path):
         """Renaming onto the memory root must be rejected."""