From 1c9b80920ce01333af12c6cd910fc7ab1bc91c89 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Tue, 3 Mar 2026 10:37:45 +0000 Subject: [PATCH 01/10] Implement generic Memory Tool and Anthropic-native memory tool --- src/microbots/MicroBot.py | 41 ++- src/microbots/llm/anthropic_api.py | 123 ++++++- .../tool_definitions/anthropic_memory_tool.py | 184 +++++++++++ .../tools/tool_definitions/memory_tool.py | 312 ++++++++++++++++++ 4 files changed, 647 insertions(+), 13 deletions(-) create mode 100644 src/microbots/tools/tool_definitions/anthropic_memory_tool.py create mode 100644 src/microbots/tools/tool_definitions/memory_tool.py diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py index 3ace5a4..7b061d7 100644 --- a/src/microbots/MicroBot.py +++ b/src/microbots/MicroBot.py @@ -317,7 +317,38 @@ def _create_environment(self, folder_to_mount: Optional[Mount]): folder_to_mount=folder_to_mount, ) + def _upgrade_tools_for_provider(self): + """Auto-upgrade provider-agnostic tools to their provider-optimised variants. + + Currently: replaces any ``MemoryTool`` with ``AnthropicMemoryTool`` when + the provider is Anthropic so the model gets native structured tool-use + instead of the text-command loop. The ``memory_dir`` and any custom + ``usage_instructions_to_llm`` are forwarded to the upgraded instance. + """ + if self.model_provider != ModelProvider.ANTHROPIC: + return + + # Local imports to avoid pulling Anthropic SDK into non-Anthropic paths + from microbots.tools.tool_definitions.memory_tool import MemoryTool + from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + upgraded = [] + for tool in self.additional_tools: + if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool): + logger.info( + "🧠 Auto-upgrading MemoryTool → AnthropicMemoryTool for Anthropic provider" + ) + upgraded.append(AnthropicMemoryTool( + memory_dir=tool.memory_dir, + usage_instructions=tool.usage_instructions_to_llm, + )) + else: + upgraded.append(tool) + self.additional_tools = upgraded + def _create_llm(self): + self._upgrade_tools_for_provider() + # Append tool usage instructions to system prompt system_prompt_with_tools = self.system_prompt if self.system_prompt else "" if self.additional_tools: @@ -334,8 +365,16 @@ def _create_llm(self): system_prompt=system_prompt_with_tools, model_name=self.deployment_name ) elif self.model_provider == ModelProvider.ANTHROPIC: + # Detect Anthropic-native tools (e.g. AnthropicMemoryTool) by duck-typing: + # any tool that exposes both to_dict() and call() is a native Anthropic tool. + native_tools = [ + t for t in self.additional_tools + if callable(getattr(t, "to_dict", None)) and callable(getattr(t, "call", None)) + ] self.llm = AnthropicApi( - system_prompt=system_prompt_with_tools, deployment_name=self.deployment_name + system_prompt=system_prompt_with_tools, + deployment_name=self.deployment_name, + native_tools=native_tools or None, ) # No Else case required as model provider is already validated using _validate_model_and_provider diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py index f40118a..4a73ba5 100644 --- a/src/microbots/llm/anthropic_api.py +++ b/src/microbots/llm/anthropic_api.py @@ -1,7 +1,9 @@ import json import os +import re from dataclasses import asdict from logging import getLogger +from typing import List, Optional from dotenv import load_dotenv from anthropic import Anthropic @@ -16,9 +18,31 @@ api_key = os.getenv("ANTHROPIC_API_KEY") + class AnthropicApi(LLMInterface): - def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3): + def __init__( + self, + system_prompt: str, + deployment_name: str = deployment_name, + max_retries: int = 3, + native_tools: Optional[List] = None, + ): + """ + Parameters + ---------- + system_prompt : str + System prompt for the LLM. + deployment_name : str + The Anthropic model deployment name. + max_retries : int + Maximum number of retries for invalid LLM responses. + native_tools : Optional[List] + Anthropic-native tool objects (e.g. ``AnthropicMemoryTool``) that + have both a ``to_dict()`` and a ``call()`` method. These are passed + directly to the API and their tool-use blocks are dispatched here + before the JSON response is returned to the caller. + """ self.ai_client = Anthropic( api_key=api_key, base_url=endpoint @@ -26,31 +50,106 @@ def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3 self.deployment_name = deployment_name self.system_prompt = system_prompt self.messages = [] + self.native_tools = native_tools or [] + # Cache tool dicts once so _call_api and _dispatch_tool_use don't + # re-serialise on every invocation (important when multiple native + # tools are registered, e.g. memory + bash). + self._native_tool_dicts = [t.to_dict() for t in self.native_tools] + self._native_tools_by_name = {d["name"]: t for d, t in zip(self._native_tool_dicts, self.native_tools)} # Set these values here. This logic will be handled in the parent class. self.max_retries = max_retries self.retries = 0 - def ask(self, message) -> LLMAskResponse: + # ---------------------------------------------------------------------- # + # Internal helpers + # ---------------------------------------------------------------------- # + + def _call_api(self) -> object: + """Call the Anthropic messages API, including native tools when present.""" + kwargs = dict( + model=self.deployment_name, + system=self.system_prompt, + messages=self.messages, + max_tokens=4096, + ) + + if self.native_tools: + kwargs["tools"] = self._native_tool_dicts + + return self.ai_client.messages.create(**kwargs) + + def _dispatch_tool_use(self, response) -> None: + """Handle a tool_use response: execute each tool call and append results. + + Mutates ``self.messages`` in place — appends the assistant turn (with + all content blocks) and the corresponding tool_result user turn. + """ + # Append the full assistant message as-is (content is a list of blocks) + assistant_content = [block.model_dump() for block in response.content] + self.messages.append({"role": "assistant", "content": assistant_content}) + + # Build tool_result entries for every tool_use block + tool_results = [] + for block in response.content: + if block.type != "tool_use": + continue + + # Find the matching native tool by name + tool = self._native_tools_by_name.get(block.name) + if tool is None: + result_text = f"Error: unknown tool '{block.name}'" + logger.error("Received tool_use for unknown tool: %s", block.name) + else: + try: + result_text = tool.call(block.input) + logger.info( + "🧠 Native tool '%s' executed. Result (first 200 chars): %s", + block.name, + str(result_text)[:200], + ) + except Exception as exc: + result_text = f"Error executing tool '{block.name}': {exc}" + logger.error("Native tool '%s' raised: %s", block.name, exc) + + tool_results.append({ + "type": "tool_result", + "tool_use_id": block.id, + "content": str(result_text), + }) + + self.messages.append({"role": "user", "content": tool_results}) + + # ---------------------------------------------------------------------- # + # Public interface + # ---------------------------------------------------------------------- # + + def ask(self, message: str) -> LLMAskResponse: self.retries = 0 # reset retries for each ask. Handled in parent class. self.messages.append({"role": "user", "content": message}) valid = False while not valid: - response = self.ai_client.messages.create( - model=self.deployment_name, - system=self.system_prompt, - messages=self.messages, - max_tokens=4096, - ) - - # Extract text content from response - response_text = response.content[0].text if response.content else "" + response = self._call_api() + + # Dispatch any tool_use rounds before looking for a JSON response. + # The model may call the memory tool multiple times before producing + # its final JSON command. + while response.stop_reason == "tool_use": + self._dispatch_tool_use(response) + response = self._call_api() + + # Extract text content from the final response + response_text = "" + for block in response.content: + if block.type == "text": + response_text = block.text + break + logger.debug("Raw Anthropic response (first 500 chars): %s", response_text[:500]) # Try to extract JSON if wrapped in markdown code blocks - import re json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL) if json_match: response_text = json_match.group(1) diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py new file mode 100644 index 0000000..e80750c --- /dev/null +++ b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py @@ -0,0 +1,184 @@ +""" +AnthropicMemoryTool — wraps Anthropic's memory tool. + +The memory tool lets the model persist information across conversations by +reading and writing files in a local memory directory. When the model invokes +the tool, it sends a command (view, create, str_replace, insert, delete, +rename) and the client executes it against a local filesystem directory. + +This implementation extends both: + - ``MemoryTool``: provides all file-operation logic (_resolve, _view, + _create, _str_replace, _insert, _delete, _rename, _clear) and the + ToolAbstract duck-typing interface. + - ``BetaAbstractMemoryTool`` (SDK): provides native Anthropic dispatch and + the ``to_dict()`` / ``call()`` interface required by AnthropicApi. + +The SDK command-handler overrides (view, create, str_replace, insert, delete, +rename) simply translate SDK command objects → arg lists and delegate to the +inherited MemoryTool private methods, converting the CmdReturn back to a +string as the SDK expects. + +The memory tool (type ``memory_20250818``) is available in the standard +Anthropic library and does not require a beta endpoint or header. Pass it +via ``tools=[{"type": "memory_20250818", "name": "memory"}]`` on a regular +``client.messages.create(...)`` call. ``AnthropicApi`` handles this +automatically when ``native_tools`` contains an ``AnthropicMemoryTool``. + +Usage: + from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + memory = AnthropicMemoryTool() + bot = ReadingBot(..., additional_tools=[memory]) +""" + +from __future__ import annotations + +from logging import getLogger +from pathlib import Path + +from typing_extensions import override + +from anthropic.lib.tools import BetaAbstractMemoryTool as _SDKMemoryTool +from anthropic.types.beta import ( + BetaMemoryTool20250818CreateCommand, + BetaMemoryTool20250818DeleteCommand, + BetaMemoryTool20250818InsertCommand, + BetaMemoryTool20250818RenameCommand, + BetaMemoryTool20250818StrReplaceCommand, + BetaMemoryTool20250818ViewCommand, +) + +from microbots.tools.tool_definitions.memory_tool import MemoryTool + +logger = getLogger(__name__) + +DEFAULT_MEMORY_INSTRUCTIONS = ( + "MEMORY PROTOCOL:\n" + "1. ALWAYS view your memory directory BEFORE doing anything else " + "using the `view` command of your `memory` tool to check for earlier progress.\n" + "2. As you make progress on the task, record status, progress, " + "and key findings in your memory using the memory tool.\n" + "3. ASSUME INTERRUPTION: Your context window might be reset at any moment, " + "so you risk losing any progress that is not recorded in your memory directory.\n" + "4. Before completing a task, always save your final results and analysis to memory.\n" + "5. When editing your memory folder, always keep its content up-to-date, coherent " + "and organized. Rename or delete files that are no longer relevant. " + "Do not create new files unless necessary.\n\n" + "IMPORTANT: The memory tool ONLY works with paths under /memories/. " + "Do NOT use the memory tool to access the repository or workdir. " + "Use shell commands (ls, cat, etc.) for filesystem access." +) + + +class AnthropicMemoryTool(MemoryTool, _SDKMemoryTool): + """ + Anthropic's built-in memory tool, backed by MemoryTool's file logic. + + Inherits file-operation logic from ``MemoryTool`` (plain Python class) and + the SDK's native dispatch interface from ``BetaAbstractMemoryTool``. + + The SDK command-handler overrides delegate to the inherited private methods + (``_view``, ``_create``, etc.), translating the SDK ``Command`` objects to + the ``args: list`` format that those methods expect, and converting the + returned ``CmdReturn`` to the string that the SDK API requires. + + Parameters + ---------- + memory_dir : str | Path | None + Root directory for memory files. Defaults to ``~/.microbots/memory``. + usage_instructions : str | None + Custom instructions appended to the system prompt for the LLM. + Defaults to ``DEFAULT_MEMORY_INSTRUCTIONS``. + """ + + def __init__( + self, + memory_dir: str | Path | None = None, + usage_instructions: str | None = None, + ) -> None: + MemoryTool.__init__( + self, + memory_dir=str(memory_dir) if memory_dir else None, + usage_instructions_to_llm=( + usage_instructions + if usage_instructions is not None + else DEFAULT_MEMORY_INSTRUCTIONS + ), + ) + _SDKMemoryTool.__init__(self) # type: ignore[call-arg] + + # ---------------------------------------------------------------------- # + # ToolAbstract duck-typing overrides + # ---------------------------------------------------------------------- # + + def is_model_supported(self, model_name: str) -> bool: + """Only Anthropic (Claude) models support the native memory tool.""" + return "claude" in model_name.lower() + + def is_invoked(self, command: str) -> bool: + """Return False — this tool is dispatched natively by AnthropicApi, + not via the shell command loop.""" + return False + + def clear_all(self) -> None: + """Delete all memory files (useful for testing or resetting state).""" + self._clear() + logger.info("🧠 AnthropicMemoryTool: memory cleared at %s", self._memory_dir) + + # ---------------------------------------------------------------------- # + # BetaAbstractMemoryTool overrides — delegate to MemoryTool private methods + # ---------------------------------------------------------------------- # + + @override + def clear_all_memory(self) -> str: + self.clear_all() + return "All memory cleared" + + @override + def view(self, command: BetaMemoryTool20250818ViewCommand) -> str: + args = [command.path] + if command.view_range: + args += ["--start", str(command.view_range[0]), "--end", str(command.view_range[1])] + result = self._view(args) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return result.stdout + + @override + def create(self, command: BetaMemoryTool20250818CreateCommand) -> str: + result = self._create([command.path, command.file_text]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return f"File created successfully at {command.path}" + + @override + def str_replace(self, command: BetaMemoryTool20250818StrReplaceCommand) -> str: + result = self._str_replace([command.path, "--old", command.old_str, "--new", command.new_str]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return f"File {command.path} has been edited" + + @override + def insert(self, command: BetaMemoryTool20250818InsertCommand) -> str: + result = self._insert([ + command.path, + "--line", str(command.insert_line), + "--text", command.insert_text, + ]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return f"Text inserted at line {command.insert_line} in {command.path}" + + @override + def delete(self, command: BetaMemoryTool20250818DeleteCommand) -> str: + result = self._delete([command.path]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return result.stdout + + @override + def rename(self, command: BetaMemoryTool20250818RenameCommand) -> str: + result = self._rename([command.old_path, command.new_path]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return result.stdout diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py new file mode 100644 index 0000000..912fe6d --- /dev/null +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -0,0 +1,312 @@ +import logging +import shlex +import shutil +from pathlib import Path +from typing import Optional + +from pydantic.dataclasses import dataclass, Field + +from microbots.environment.Environment import CmdReturn +from microbots.tools.external_tool import ExternalTool + +logger = logging.getLogger(" 🧠 MemoryTool") + +INSTRUCTIONS_TO_LLM = """ +Use this tool to persist information to files across steps — same interface as +the Anthropic memory tool. All paths must be under /memories/. + +MEMORY PROTOCOL: +1. ALWAYS run `memory view /memories` BEFORE doing anything else to check for + earlier progress. +2. Record status, findings and intermediate results as you go. +3. Before completing a task, save your final results to memory. +4. Keep the memory folder organised — rename or delete stale files. + +## Commands + +View a file or list a directory: + memory view + memory view --start --end + +Create a file: + memory create + +Replace a unique string in a file: + memory str_replace --old "" --new "" + +Insert a line into a file (0 = prepend): + memory insert --line --text "" + +Delete a file or directory: + memory delete + +Rename / move a file: + memory rename + +Clear all memory: + memory clear + +## Examples + + memory view /memories + memory create /memories/progress.md "## Progress\\n- Found bug in src/foo.py line 42" + memory str_replace /memories/progress.md --old "line 42" --new "line 45" + memory insert /memories/progress.md --line 0 --text "# Task Notes" + memory view /memories/progress.md --start 1 --end 10 + memory delete /memories/old_notes.md + memory rename /memories/draft.md /memories/final.md + +## Notes +- Paths must start with /memories/. +- memory create overwrites if the file already exists. +- memory str_replace requires the old text to appear exactly once. +""" + + +@dataclass +class MemoryTool(ExternalTool): + """ + File-backed memory tool that mirrors the ``AnthropicMemoryTool`` interface + but dispatches through the text command loop (compatible with all providers). + + Subclass of ``ExternalTool`` — all command lists are empty so + ``install_tool``, ``setup_tool``, ``verify_tool_installation``, and + ``uninstall_tool`` are all effective no-ops inherited from ``ExternalTool``. + + All files are stored under ``memory_dir`` on the host (default + ``~/.microbots/memory``). The LLM uses paths like ``/memories/notes.md`` + which are resolved relative to ``memory_dir``. + + Supported subcommands + --------------------- + memory view [--start N] [--end N] + memory create + memory str_replace --old --new + memory insert --line N --text + memory delete + memory rename + memory clear + """ + + name: str = Field(default="memory") + description: str = Field( + default="File-backed memory store — view, create, edit, delete files under /memories/." + ) + usage_instructions_to_llm: str = Field(default=INSTRUCTIONS_TO_LLM) + memory_dir: Optional[str] = Field(default=None) + + def __post_init__(self): + base = Path(self.memory_dir) if self.memory_dir else Path.home() / ".microbots" / "memory" + self._memory_dir = base + self._memory_dir.mkdir(parents=True, exist_ok=True) + + def is_model_supported(self, model_name: str) -> bool: + return True + + # ---------------------------------------------------------------------- # + # Path helpers + # ---------------------------------------------------------------------- # + + def _resolve(self, path: str) -> Path: + """Resolve a /memories/… path to an absolute host path.""" + stripped = path.lstrip("/") + if stripped == "memories": + rel = "" + elif stripped.startswith("memories/"): + rel = stripped[len("memories/"):] + elif stripped.startswith(("workdir", "home", "tmp", "var", "etc", "usr")): + raise ValueError( + f"Invalid memory path: {path!r}. Use paths under /memories/." + ) + else: + rel = stripped # treat as relative to memory_dir + + resolved = (self._memory_dir / rel).resolve() if rel else self._memory_dir.resolve() + if not str(resolved).startswith(str(self._memory_dir.resolve())): + raise ValueError(f"Path traversal not allowed: {path!r}") + return resolved + + # ---------------------------------------------------------------------- # + # ToolAbstract interface + # ---------------------------------------------------------------------- # + + def is_invoked(self, command: str) -> bool: + return command.strip().startswith("memory ") + + def invoke(self, command: str, parent_bot) -> CmdReturn: + try: + tokens = shlex.split(command) + except ValueError as exc: + return CmdReturn(stdout="", stderr=f"Parse error: {exc}", return_code=1) + + if len(tokens) < 2: + return CmdReturn(stdout="", stderr="Usage: memory ...", return_code=1) + + sub = tokens[1] + args = tokens[2:] + + try: + if sub == "view": + return self._view(args) + elif sub == "create": + return self._create(args) + elif sub == "str_replace": + return self._str_replace(args) + elif sub == "insert": + return self._insert(args) + elif sub == "delete": + return self._delete(args) + elif sub == "rename": + return self._rename(args) + elif sub == "clear": + return self._clear() + else: + return CmdReturn(stdout="", stderr=f"Unknown subcommand: {sub!r}", return_code=1) + except (ValueError, FileNotFoundError, RuntimeError) as exc: + logger.error("🧠 MemoryTool error: %s", exc) + return CmdReturn(stdout="", stderr=str(exc), return_code=1) + + # ---------------------------------------------------------------------- # + # Subcommand handlers + # ---------------------------------------------------------------------- # + + def _view(self, args: list) -> CmdReturn: + if not args: + return CmdReturn(stdout="", stderr="Usage: memory view [--start N] [--end N]", return_code=1) + + path = args[0] + start_line = None + end_line = None + i = 1 + while i < len(args): + if args[i] == "--start" and i + 1 < len(args): + start_line = int(args[i + 1]); i += 2 + elif args[i] == "--end" and i + 1 < len(args): + end_line = int(args[i + 1]); i += 2 + else: + i += 1 + + resolved = self._resolve(path) + if not resolved.exists(): + raise RuntimeError(f"Path not found: {path!r}") + + if resolved.is_dir(): + items = [ + (f"{item.name}/" if item.is_dir() else item.name) + for item in sorted(resolved.iterdir()) + if not item.name.startswith(".") + ] + result = f"Directory: {path}\n" + "\n".join(f"- {i}" for i in items) + return CmdReturn(stdout=result, stderr="", return_code=0) + + lines = resolved.read_text(encoding="utf-8").splitlines() + if start_line is not None or end_line is not None: + s = max(0, (start_line or 1) - 1) + e = len(lines) if (end_line is None or end_line == -1) else end_line + lines = lines[s:e] + base_num = s + 1 + else: + base_num = 1 + numbered = "\n".join(f"{i + base_num:4d}: {line}" for i, line in enumerate(lines)) + return CmdReturn(stdout=numbered, stderr="", return_code=0) + + def _create(self, args: list) -> CmdReturn: + if len(args) < 2: + return CmdReturn(stdout="", stderr="Usage: memory create ", return_code=1) + path, content = args[0], args[1] + resolved = self._resolve(path) + resolved.parent.mkdir(parents=True, exist_ok=True) + resolved.write_text(content, encoding="utf-8") + logger.info("🧠 Memory file created: %s", path) + return CmdReturn(stdout=f"File created: {path}", stderr="", return_code=0) + + def _str_replace(self, args: list) -> CmdReturn: + if not args: + return CmdReturn(stdout="", stderr="Usage: memory str_replace --old --new ", return_code=1) + path = args[0] + old_text = new_text = None + i = 1 + while i < len(args): + if args[i] == "--old" and i + 1 < len(args): + old_text = args[i + 1]; i += 2 + elif args[i] == "--new" and i + 1 < len(args): + new_text = args[i + 1]; i += 2 + else: + i += 1 + if old_text is None or new_text is None: + return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1) + resolved = self._resolve(path) + if not resolved.is_file(): + raise FileNotFoundError(f"File not found: {path!r}") + content = resolved.read_text(encoding="utf-8") + count = content.count(old_text) + if count == 0: + raise ValueError(f"Text not found in {path!r}") + if count > 1: + raise ValueError(f"Text appears {count} times in {path!r} — must be unique") + resolved.write_text(content.replace(old_text, new_text, 1), encoding="utf-8") + return CmdReturn(stdout=f"File {path} edited.", stderr="", return_code=0) + + def _insert(self, args: list) -> CmdReturn: + if not args: + return CmdReturn(stdout="", stderr="Usage: memory insert --line N --text ", return_code=1) + path = args[0] + line_num = text = None + i = 1 + while i < len(args): + if args[i] == "--line" and i + 1 < len(args): + line_num = int(args[i + 1]); i += 2 + elif args[i] == "--text" and i + 1 < len(args): + text = args[i + 1]; i += 2 + else: + i += 1 + if line_num is None or text is None: + return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1) + resolved = self._resolve(path) + if not resolved.is_file(): + raise FileNotFoundError(f"File not found: {path!r}") + lines = resolved.read_text(encoding="utf-8").splitlines() + if line_num < 0 or line_num > len(lines): + raise ValueError(f"Invalid line number {line_num}. Must be 0–{len(lines)}.") + lines.insert(line_num, text.rstrip("\n")) + resolved.write_text("\n".join(lines) + "\n", encoding="utf-8") + return CmdReturn(stdout=f"Text inserted at line {line_num} in {path}.", stderr="", return_code=0) + + def _delete(self, args: list) -> CmdReturn: + if not args: + return CmdReturn(stdout="", stderr="Usage: memory delete ", return_code=1) + path = args[0] + if path.rstrip("/") in ("/memories", "memories", ""): + raise ValueError("Cannot delete the /memories root directory") + resolved = self._resolve(path) + if resolved.is_file(): + resolved.unlink() + logger.info("🧠 Memory file deleted: %s", path) + return CmdReturn(stdout=f"Deleted: {path}", stderr="", return_code=0) + if resolved.is_dir(): + shutil.rmtree(resolved) + logger.info("🧠 Memory directory deleted: %s", path) + return CmdReturn(stdout=f"Deleted directory: {path}", stderr="", return_code=0) + raise FileNotFoundError(f"Path not found: {path!r}") + + def _rename(self, args: list) -> CmdReturn: + if len(args) < 2: + return CmdReturn(stdout="", stderr="Usage: memory rename ", return_code=1) + old_path, new_path = args[0], args[1] + old_resolved = self._resolve(old_path) + new_resolved = self._resolve(new_path) + if not old_resolved.exists(): + raise FileNotFoundError(f"Source not found: {old_path!r}") + if new_resolved.exists(): + raise ValueError(f"Destination already exists: {new_path!r}") + new_resolved.parent.mkdir(parents=True, exist_ok=True) + old_resolved.rename(new_resolved) + logger.info("🧠 Memory renamed: %s → %s", old_path, new_path) + return CmdReturn(stdout=f"Renamed {old_path} to {new_path}.", stderr="", return_code=0) + + def _clear(self) -> CmdReturn: + if self._memory_dir.exists(): + shutil.rmtree(self._memory_dir) + self._memory_dir.mkdir(parents=True, exist_ok=True) + logger.info("🧠 Memory cleared.") + return CmdReturn(stdout="Memory cleared.", stderr="", return_code=0) From 315cc03ad359947624f5ebdbc09efed0af094ae4 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Tue, 3 Mar 2026 10:51:30 +0000 Subject: [PATCH 02/10] Refactor MemoryTool error handling to return CmdReturn instead of raising exceptions and add tests --- .../tools/tool_definitions/memory_tool.py | 20 +- test/llm/test_anthropic_api.py | 367 ++++++++++++++ .../tool_definitions/test_memory_tool.py | 449 ++++++++++++++++++ 3 files changed, 826 insertions(+), 10 deletions(-) create mode 100644 test/tools/tool_definitions/test_memory_tool.py diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py index 912fe6d..310f880 100644 --- a/src/microbots/tools/tool_definitions/memory_tool.py +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -188,7 +188,7 @@ def _view(self, args: list) -> CmdReturn: resolved = self._resolve(path) if not resolved.exists(): - raise RuntimeError(f"Path not found: {path!r}") + return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1) if resolved.is_dir(): items = [ @@ -237,13 +237,13 @@ def _str_replace(self, args: list) -> CmdReturn: return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1) resolved = self._resolve(path) if not resolved.is_file(): - raise FileNotFoundError(f"File not found: {path!r}") + return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1) content = resolved.read_text(encoding="utf-8") count = content.count(old_text) if count == 0: - raise ValueError(f"Text not found in {path!r}") + return CmdReturn(stdout="", stderr=f"Text not found in {path!r}", return_code=1) if count > 1: - raise ValueError(f"Text appears {count} times in {path!r} — must be unique") + return CmdReturn(stdout="", stderr=f"Text appears {count} times in {path!r} — must be unique", return_code=1) resolved.write_text(content.replace(old_text, new_text, 1), encoding="utf-8") return CmdReturn(stdout=f"File {path} edited.", stderr="", return_code=0) @@ -264,10 +264,10 @@ def _insert(self, args: list) -> CmdReturn: return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1) resolved = self._resolve(path) if not resolved.is_file(): - raise FileNotFoundError(f"File not found: {path!r}") + return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1) lines = resolved.read_text(encoding="utf-8").splitlines() if line_num < 0 or line_num > len(lines): - raise ValueError(f"Invalid line number {line_num}. Must be 0–{len(lines)}.") + return CmdReturn(stdout="", stderr=f"Invalid line number {line_num}. Must be 0–{len(lines)}.", return_code=1) lines.insert(line_num, text.rstrip("\n")) resolved.write_text("\n".join(lines) + "\n", encoding="utf-8") return CmdReturn(stdout=f"Text inserted at line {line_num} in {path}.", stderr="", return_code=0) @@ -277,7 +277,7 @@ def _delete(self, args: list) -> CmdReturn: return CmdReturn(stdout="", stderr="Usage: memory delete ", return_code=1) path = args[0] if path.rstrip("/") in ("/memories", "memories", ""): - raise ValueError("Cannot delete the /memories root directory") + return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1) resolved = self._resolve(path) if resolved.is_file(): resolved.unlink() @@ -287,7 +287,7 @@ def _delete(self, args: list) -> CmdReturn: shutil.rmtree(resolved) logger.info("🧠 Memory directory deleted: %s", path) return CmdReturn(stdout=f"Deleted directory: {path}", stderr="", return_code=0) - raise FileNotFoundError(f"Path not found: {path!r}") + return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1) def _rename(self, args: list) -> CmdReturn: if len(args) < 2: @@ -296,9 +296,9 @@ def _rename(self, args: list) -> CmdReturn: old_resolved = self._resolve(old_path) new_resolved = self._resolve(new_path) if not old_resolved.exists(): - raise FileNotFoundError(f"Source not found: {old_path!r}") + return CmdReturn(stdout="", stderr=f"Source not found: {old_path!r}", return_code=1) if new_resolved.exists(): - raise ValueError(f"Destination already exists: {new_path!r}") + return CmdReturn(stdout="", stderr=f"Destination already exists: {new_path!r}", return_code=1) new_resolved.parent.mkdir(parents=True, exist_ok=True) old_resolved.rename(new_resolved) logger.info("🧠 Memory renamed: %s → %s", old_path, new_path) diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py index 674294c..49674aa 100644 --- a/test/llm/test_anthropic_api.py +++ b/test/llm/test_anthropic_api.py @@ -98,7 +98,9 @@ def test_ask_successful_response(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "echo 'hello'", @@ -133,7 +135,9 @@ def test_ask_with_task_done_true(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": True, "command": "", @@ -157,12 +161,16 @@ def test_ask_with_retry_on_invalid_response(self): # Mock the Anthropic client to return invalid then valid response mock_invalid_response = Mock() + mock_invalid_response.stop_reason = "end_turn" mock_invalid_content = Mock() + mock_invalid_content.type = "text" mock_invalid_content.text = "invalid json" mock_invalid_response.content = [mock_invalid_content] mock_valid_response = Mock() + mock_valid_response.stop_reason = "end_turn" mock_valid_content = Mock() + mock_valid_content.type = "text" mock_valid_content.text = json.dumps({ "task_done": False, "command": "ls -la", @@ -193,7 +201,9 @@ def test_ask_appends_user_message(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "pwd", @@ -218,7 +228,9 @@ def test_ask_appends_assistant_response_as_json(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "echo test", @@ -247,7 +259,9 @@ def test_ask_uses_asdict_for_response(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" response_dict = { "task_done": True, "command": "", @@ -277,7 +291,9 @@ def test_ask_resets_retries_to_zero(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "ls", @@ -299,7 +315,9 @@ def test_ask_extracts_json_from_markdown(self): # Mock response with markdown-wrapped JSON mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = """Here's the response: ```json { @@ -420,7 +438,9 @@ def test_ask_with_empty_message(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "echo ''", @@ -443,7 +463,9 @@ def test_multiple_ask_calls_append_messages(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "pwd", @@ -513,6 +535,351 @@ def test_anthropic_api_clear_history_integration(self): assert len(api.messages) == 0 # Anthropic doesn't store system in messages +# ============================================================================ +# Tests for native_tools support (new changes) +# ============================================================================ + +@pytest.mark.unit +class TestAnthropicApiNativeToolsInit: + """Tests for __init__ native_tools caching.""" + + @pytest.fixture(autouse=True) + def _use_patch(self, patch_anthropic_config): + pass + + def test_init_without_native_tools_has_empty_caches(self): + api = AnthropicApi(system_prompt="test") + + assert api.native_tools == [] + assert api._native_tool_dicts == [] + assert api._native_tools_by_name == {} + + def test_init_with_none_native_tools_has_empty_caches(self): + api = AnthropicApi(system_prompt="test", native_tools=None) + + assert api._native_tool_dicts == [] + assert api._native_tools_by_name == {} + + def test_init_with_single_native_tool_caches_dict(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"} + + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + assert api._native_tool_dicts == [{"name": "memory", "type": "memory_20250818"}] + + def test_init_with_single_native_tool_caches_by_name(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + assert "memory" in api._native_tools_by_name + assert api._native_tools_by_name["memory"] is tool + + def test_init_with_multiple_native_tools_caches_all(self): + tool1 = Mock() + tool1.to_dict.return_value = {"name": "memory"} + tool2 = Mock() + tool2.to_dict.return_value = {"name": "bash"} + + api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2]) + + assert len(api._native_tool_dicts) == 2 + assert api._native_tools_by_name["memory"] is tool1 + assert api._native_tools_by_name["bash"] is tool2 + + def test_init_calls_to_dict_exactly_once_per_tool(self): + """to_dict() must not be called again on subsequent API calls.""" + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + + AnthropicApi(system_prompt="test", native_tools=[tool]) + + assert tool.to_dict.call_count == 1 + + +@pytest.mark.unit +class TestAnthropicApiCallApiWithTools: + """Tests for _call_api including/excluding the tools kwarg.""" + + @pytest.fixture(autouse=True) + def _use_patch(self, patch_anthropic_config): + pass + + def test_call_api_without_tools_omits_tools_kwarg(self): + api = AnthropicApi(system_prompt="test", deployment_name="claude-3") + api.messages = [{"role": "user", "content": "hello"}] + api.ai_client.messages.create = Mock(return_value=Mock()) + + api._call_api() + + call_kwargs = api.ai_client.messages.create.call_args[1] + assert "tools" not in call_kwargs + + def test_call_api_with_tools_passes_cached_dicts(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"} + api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool]) + api.messages = [{"role": "user", "content": "hello"}] + api.ai_client.messages.create = Mock(return_value=Mock()) + + api._call_api() + + call_kwargs = api.ai_client.messages.create.call_args[1] + assert "tools" in call_kwargs + assert call_kwargs["tools"] == [{"name": "memory", "type": "memory_20250818"}] + + def test_call_api_does_not_call_to_dict_again(self): + """to_dict() should only be called during __init__, never during _call_api.""" + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool]) + api.messages = [{"role": "user", "content": "hello"}] + api.ai_client.messages.create = Mock(return_value=Mock()) + + count_after_init = tool.to_dict.call_count # should be 1 + api._call_api() + api._call_api() + + assert tool.to_dict.call_count == count_after_init # no increase + + +@pytest.mark.unit +class TestAnthropicApiDispatchToolUse: + """Tests for _dispatch_tool_use.""" + + @pytest.fixture(autouse=True) + def _use_patch(self, patch_anthropic_config): + pass + + # ------------------------------------------------------------------ # + # Helpers + # ------------------------------------------------------------------ # + + @staticmethod + def _tool_use_block(name, tool_id="tu_001", input_data=None): + block = Mock() + block.type = "tool_use" + block.name = name + block.id = tool_id + block.input = input_data or {} + block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": name} + return block + + @staticmethod + def _text_block(text="hello"): + block = Mock() + block.type = "text" + block.text = text + block.model_dump.return_value = {"type": "text", "text": text} + return block + + # ------------------------------------------------------------------ # + # Tests + # ------------------------------------------------------------------ # + + def test_dispatch_appends_assistant_message_first(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "ok" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + response = Mock() + response.content = [self._tool_use_block("memory")] + api._dispatch_tool_use(response) + + assert api.messages[0]["role"] == "assistant" + + def test_dispatch_appends_tool_result_user_message(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "file listing" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + response = Mock() + response.content = [self._tool_use_block("memory", tool_id="tu_abc")] + api._dispatch_tool_use(response) + + user_msg = api.messages[1] + assert user_msg["role"] == "user" + assert user_msg["content"][0]["type"] == "tool_result" + assert user_msg["content"][0]["tool_use_id"] == "tu_abc" + assert user_msg["content"][0]["content"] == "file listing" + + def test_dispatch_calls_tool_with_correct_input(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "ok" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + input_data = {"command": "view", "path": "/memories"} + response = Mock() + response.content = [self._tool_use_block("memory", input_data=input_data)] + api._dispatch_tool_use(response) + + tool.call.assert_called_once_with(input_data) + + def test_dispatch_unknown_tool_returns_error_in_result(self): + api = AnthropicApi(system_prompt="test") # no native tools + + response = Mock() + response.content = [self._tool_use_block("unknown_tool", tool_id="tu_err")] + api._dispatch_tool_use(response) + + content = api.messages[1]["content"][0]["content"] + assert "Error" in content + assert "unknown_tool" in content + + def test_dispatch_tool_exception_returns_error_message(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.side_effect = RuntimeError("disk full") + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + response = Mock() + response.content = [self._tool_use_block("memory", tool_id="tu_exc")] + api._dispatch_tool_use(response) + + content = api.messages[1]["content"][0]["content"] + assert "Error" in content + assert "disk full" in content + + def test_dispatch_skips_non_tool_use_content_blocks(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "result" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + response = Mock() + response.content = [ + self._text_block("thinking..."), + self._tool_use_block("memory", tool_id="tu_only"), + ] + api._dispatch_tool_use(response) + + tool_results = api.messages[1]["content"] + assert len(tool_results) == 1 + assert tool_results[0]["tool_use_id"] == "tu_only" + + def test_dispatch_handles_multiple_tool_use_blocks(self): + tool1 = Mock() + tool1.to_dict.return_value = {"name": "memory"} + tool1.call.return_value = "memory result" + tool2 = Mock() + tool2.to_dict.return_value = {"name": "bash"} + tool2.call.return_value = "bash result" + api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2]) + + response = Mock() + response.content = [ + self._tool_use_block("memory", tool_id="id_1"), + self._tool_use_block("bash", tool_id="id_2"), + ] + api._dispatch_tool_use(response) + + results = api.messages[1]["content"] + assert len(results) == 2 + assert results[0]["tool_use_id"] == "id_1" + assert results[0]["content"] == "memory result" + assert results[1]["tool_use_id"] == "id_2" + assert results[1]["content"] == "bash result" + + +@pytest.mark.unit +class TestAnthropicApiAskWithToolUseLoop: + """Tests for ask() cycling through tool_use rounds before returning JSON.""" + + @pytest.fixture(autouse=True) + def _use_patch(self, patch_anthropic_config): + pass + + @staticmethod + def _tool_use_response(tool_name, tool_id): + block = Mock() + block.type = "tool_use" + block.name = tool_name + block.id = tool_id + block.input = {} + block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": tool_name} + response = Mock() + response.stop_reason = "tool_use" + response.content = [block] + return response + + @staticmethod + def _text_response(json_dict): + block = Mock() + block.type = "text" + block.text = json.dumps(json_dict) + block.model_dump.return_value = {"type": "text", "text": block.text} + response = Mock() + response.stop_reason = "end_turn" + response.content = [block] + return response + + def test_ask_dispatches_one_tool_use_round_then_returns(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "viewed /memories" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + tool_resp = self._tool_use_response("memory", "tu_1") + final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""}) + api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp]) + + result = api.ask("do the task") + + assert api.ai_client.messages.create.call_count == 2 + tool.call.assert_called_once() + assert result.command == "ls /" + + def test_ask_dispatches_multiple_tool_use_rounds(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "ok" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + tool_resp1 = self._tool_use_response("memory", "tu_1") + tool_resp2 = self._tool_use_response("memory", "tu_2") + final_resp = self._text_response({"task_done": True, "command": "", "thoughts": "done"}) + api.ai_client.messages.create = Mock(side_effect=[tool_resp1, tool_resp2, final_resp]) + + result = api.ask("do the task") + + assert api.ai_client.messages.create.call_count == 3 + assert tool.call.call_count == 2 + assert result.task_done is True + + def test_ask_without_tool_use_does_not_dispatch(self): + api = AnthropicApi(system_prompt="test") + + final_resp = self._text_response({"task_done": False, "command": "pwd", "thoughts": ""}) + api.ai_client.messages.create = Mock(return_value=final_resp) + + result = api.ask("where am I?") + + assert api.ai_client.messages.create.call_count == 1 + assert result.command == "pwd" + + def test_ask_tool_use_messages_are_added_to_history(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "result" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + tool_resp = self._tool_use_response("memory", "tu_1") + final_resp = self._text_response({"task_done": False, "command": "echo hi", "thoughts": ""}) + api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp]) + + api.ask("do it") + + # Messages: user, assistant(tool_use), user(tool_result), assistant(final json) + roles = [m["role"] for m in api.messages] + assert roles.count("user") == 2 + assert roles.count("assistant") == 2 + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py new file mode 100644 index 0000000..2c317f6 --- /dev/null +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -0,0 +1,449 @@ +""" +Unit tests for MemoryTool — file-backed memory store. + +All tests use pytest's tmp_path fixture so they are isolated from the +user's real ~/.microbots/memory directory. +""" +import sys +import os +import pytest +from pathlib import Path +from unittest.mock import Mock + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../src"))) + +from microbots.tools.tool_definitions.memory_tool import MemoryTool +from microbots.environment.Environment import CmdReturn + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def make_tool(tmp_path: Path) -> MemoryTool: + """Return a MemoryTool whose memory_dir lives under tmp_path.""" + return MemoryTool(memory_dir=str(tmp_path / "memory")) + + +# --------------------------------------------------------------------------- +# Initialisation +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolInit: + + def test_memory_dir_is_created_on_init(self, tmp_path): + mem_dir = tmp_path / "memory" + assert not mem_dir.exists() + + make_tool(tmp_path) + + assert mem_dir.exists() + assert mem_dir.is_dir() + + def test_default_memory_dir_under_home(self, monkeypatch, tmp_path): + """When no memory_dir is given it falls back to ~/.microbots/memory.""" + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) + tool = MemoryTool() + assert tool._memory_dir == tmp_path / ".microbots" / "memory" + + +# --------------------------------------------------------------------------- +# is_invoked +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolIsInvoked: + + def test_returns_true_for_memory_commands(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked("memory view /memories") is True + assert tool.is_invoked("memory create /memories/f.md hello") is True + + def test_returns_false_for_other_commands(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked("ls -la") is False + assert tool.is_invoked("cat file.txt") is False + assert tool.is_invoked("") is False + + def test_strips_leading_whitespace(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked(" memory view /memories") is True + + +# --------------------------------------------------------------------------- +# Path resolution (_resolve) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolResolve: + + def test_resolve_memories_root(self, tmp_path): + tool = make_tool(tmp_path) + assert tool._resolve("/memories") == tool._memory_dir.resolve() + + def test_resolve_memories_subpath(self, tmp_path): + tool = make_tool(tmp_path) + resolved = tool._resolve("/memories/notes.md") + assert resolved == (tool._memory_dir / "notes.md").resolve() + + def test_resolve_rejects_path_traversal(self, tmp_path): + tool = make_tool(tmp_path) + with pytest.raises(ValueError, match="Path traversal"): + tool._resolve("/memories/../../etc/passwd") + + def test_resolve_rejects_non_memory_paths(self, tmp_path): + tool = make_tool(tmp_path) + for bad in ("/workdir/file", "/home/user/file", "/tmp/file"): + with pytest.raises(ValueError): + tool._resolve(bad) + + +# --------------------------------------------------------------------------- +# _view +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolView: + + def test_view_directory_lists_contents(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "notes.md").write_text("hello") + (tool._memory_dir / "sub").mkdir() + + result = tool._view(["/memories"]) + + assert result.return_code == 0 + assert "notes.md" in result.stdout + assert "sub/" in result.stdout + + def test_view_file_returns_numbered_lines(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\nline3\n") + + result = tool._view(["/memories/f.md"]) + + assert result.return_code == 0 + assert "1:" in result.stdout + assert "line1" in result.stdout + assert "3:" in result.stdout + + def test_view_file_with_line_range(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n") + + result = tool._view(["/memories/f.md", "--start", "2", "--end", "4"]) + + assert result.return_code == 0 + assert "b" in result.stdout + assert "d" in result.stdout + assert "a" not in result.stdout + assert "e" not in result.stdout + + def test_view_nonexistent_path_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool._view(["/memories/nonexistent.md"]) + + assert result.return_code != 0 + + def test_view_no_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._view([]) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _create +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolCreate: + + def test_create_writes_file(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool._create(["/memories/notes.md", "hello world"]) + + assert result.return_code == 0 + assert (tool._memory_dir / "notes.md").read_text() == "hello world" + + def test_create_overwrites_existing_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("old content") + + result = tool._create(["/memories/f.md", "new content"]) + + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "new content" + + def test_create_creates_parent_directories(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool._create(["/memories/sub/dir/f.md", "content"]) + + assert result.return_code == 0 + assert (tool._memory_dir / "sub" / "dir" / "f.md").exists() + + def test_create_missing_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._create(["/memories/f.md"]) # missing content + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _str_replace +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolStrReplace: + + def test_str_replace_replaces_unique_text(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "goodbye"]) + + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + + def test_str_replace_fails_when_text_not_found(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + result = tool._str_replace(["/memories/f.md", "--old", "nothere", "--new", "x"]) + + assert result.return_code != 0 + assert "not found" in result.stderr.lower() + + def test_str_replace_fails_when_text_not_unique(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello hello") + + result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "bye"]) + + assert result.return_code != 0 + assert "2" in result.stderr # appears N times + + def test_str_replace_missing_flags_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("text") + result = tool._str_replace(["/memories/f.md"]) + assert result.return_code != 0 + + def test_str_replace_nonexistent_file_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"]) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _insert +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolInsert: + + def test_insert_prepends_at_line_zero(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + result = tool._insert(["/memories/f.md", "--line", "0", "--text", "prepended"]) + + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + assert lines[1] == "line1" + + def test_insert_at_end_of_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + result = tool._insert(["/memories/f.md", "--line", "2", "--text", "appended"]) + + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[-1] == "appended" + + def test_insert_invalid_line_number_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\n") + + result = tool._insert(["/memories/f.md", "--line", "99", "--text", "x"]) + + assert result.return_code != 0 + + def test_insert_nonexistent_file_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._insert(["/memories/missing.md", "--line", "0", "--text", "x"]) + assert result.return_code != 0 + + def test_insert_missing_flags_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\n") + result = tool._insert(["/memories/f.md"]) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _delete +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolDelete: + + def test_delete_removes_file(self, tmp_path): + tool = make_tool(tmp_path) + f = tool._memory_dir / "f.md" + f.write_text("data") + + result = tool._delete(["/memories/f.md"]) + + assert result.return_code == 0 + assert not f.exists() + + def test_delete_removes_directory(self, tmp_path): + tool = make_tool(tmp_path) + sub = tool._memory_dir / "sub" + sub.mkdir() + (sub / "f.md").write_text("data") + + result = tool._delete(["/memories/sub"]) + + assert result.return_code == 0 + assert not sub.exists() + + def test_delete_prevents_root_deletion(self, tmp_path): + tool = make_tool(tmp_path) + for path in ("/memories", "memories", "/memories/"): + result = tool._delete([path]) + assert result.return_code != 0 + + def test_delete_nonexistent_path_raises(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._delete(["/memories/nonexistent.md"]) + assert result.return_code != 0 + + def test_delete_no_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._delete([]) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _rename +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolRename: + + def test_rename_moves_file(self, tmp_path): + tool = make_tool(tmp_path) + src = tool._memory_dir / "old.md" + src.write_text("content") + + result = tool._rename(["/memories/old.md", "/memories/new.md"]) + + assert result.return_code == 0 + assert not src.exists() + assert (tool._memory_dir / "new.md").read_text() == "content" + + def test_rename_nonexistent_source_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._rename(["/memories/missing.md", "/memories/new.md"]) + assert result.return_code != 0 + + def test_rename_fails_if_destination_exists(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "a.md").write_text("a") + (tool._memory_dir / "b.md").write_text("b") + + result = tool._rename(["/memories/a.md", "/memories/b.md"]) + + assert result.return_code != 0 + + def test_rename_missing_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._rename(["/memories/a.md"]) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _clear +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolClear: + + def test_clear_removes_all_files(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "a.md").write_text("a") + (tool._memory_dir / "b.md").write_text("b") + + result = tool._clear() + + assert result.return_code == 0 + assert list(tool._memory_dir.iterdir()) == [] + + def test_clear_leaves_memory_dir_intact(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + tool._clear() + + assert tool._memory_dir.exists() + + +# --------------------------------------------------------------------------- +# invoke — full command dispatch +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolInvoke: + + def test_invoke_view_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello") + + result = tool.invoke("memory view /memories/f.md", parent_bot=Mock()) + + assert result.return_code == 0 + assert "hello" in result.stdout + + def test_invoke_create_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool.invoke('memory create /memories/n.md "some content"', parent_bot=Mock()) + + assert result.return_code == 0 + assert (tool._memory_dir / "n.md").read_text() == "some content" + + def test_invoke_clear_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + result = tool.invoke("memory clear", parent_bot=Mock()) + + assert result.return_code == 0 + assert list(tool._memory_dir.iterdir()) == [] + + def test_invoke_unknown_subcommand_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke("memory frobnicate /memories/f.md", parent_bot=Mock()) + assert result.return_code != 0 + assert "Unknown subcommand" in result.stderr + + def test_invoke_too_few_tokens_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke("memory", parent_bot=Mock()) + assert result.return_code != 0 + + def test_invoke_handles_bad_quoting_gracefully(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke('memory create /memories/f.md "unclosed', parent_bot=Mock()) + assert result.return_code != 0 + assert "Parse error" in result.stderr + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) From 0260226a00118678c240951d5e66611b4cb8e0a1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Mar 2026 07:48:13 +0000 Subject: [PATCH 03/10] Initial plan From ae8e01ef1941a529f3f4a6cc496ed2cf284e1869 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Mar 2026 07:49:29 +0000 Subject: [PATCH 04/10] Add workflow_dispatch trigger to dockerBuildPush workflow Co-authored-by: KavyaSree2610 <92566732+KavyaSree2610@users.noreply.github.com> --- .github/workflows/dockerBuildPush.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/dockerBuildPush.yml b/.github/workflows/dockerBuildPush.yml index 08341d0..ae10709 100644 --- a/.github/workflows/dockerBuildPush.yml +++ b/.github/workflows/dockerBuildPush.yml @@ -1,6 +1,7 @@ name: Build and Push to Docker Hub on: + workflow_dispatch: push: paths: - 'src/microbots/environment/local_docker/image_builder/Dockerfile' From abba9026ae427110e0479f5f329f30cd7b8eea5a Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Thu, 5 Mar 2026 08:22:40 +0000 Subject: [PATCH 05/10] Modify comments in AnthropicMemoryTool implementation --- .../tools/tool_definitions/anthropic_memory_tool.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py index e80750c..9cf547e 100644 --- a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py +++ b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py @@ -8,8 +8,8 @@ This implementation extends both: - ``MemoryTool``: provides all file-operation logic (_resolve, _view, - _create, _str_replace, _insert, _delete, _rename, _clear) and the - ToolAbstract duck-typing interface. + _create, _str_replace, _insert, _delete, _rename, _clear) and satisfies + the ``ToolAbstract`` ABC (install_tool, verify_tool_installation, etc.). - ``BetaAbstractMemoryTool`` (SDK): provides native Anthropic dispatch and the ``to_dict()`` / ``call()`` interface required by AnthropicApi. @@ -108,7 +108,7 @@ def __init__( _SDKMemoryTool.__init__(self) # type: ignore[call-arg] # ---------------------------------------------------------------------- # - # ToolAbstract duck-typing overrides + # ToolAbstract overrides # ---------------------------------------------------------------------- # def is_model_supported(self, model_name: str) -> bool: From 213aa9e70bd249df48a74e1e956930ea14a76656 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Thu, 5 Mar 2026 09:01:00 +0000 Subject: [PATCH 06/10] Add unit tests for MemoryTool and AnthropicMemoryTool functionality --- test/bot/test_upgrade_tools_for_provider.py | 161 ++++++++ .../test_anthropic_memory_tool.py | 347 ++++++++++++++++++ .../tool_definitions/test_memory_tool.py | 104 ++++++ 3 files changed, 612 insertions(+) create mode 100644 test/bot/test_upgrade_tools_for_provider.py create mode 100644 test/tools/tool_definitions/test_anthropic_memory_tool.py diff --git a/test/bot/test_upgrade_tools_for_provider.py b/test/bot/test_upgrade_tools_for_provider.py new file mode 100644 index 0000000..f5d1e98 --- /dev/null +++ b/test/bot/test_upgrade_tools_for_provider.py @@ -0,0 +1,161 @@ +""" +Unit tests for MicroBot._upgrade_tools_for_provider. + +These tests verify that plain ``MemoryTool`` instances are automatically +replaced with ``AnthropicMemoryTool`` when the model provider is Anthropic, +and that no changes are made for other providers or other tool types. + +All tests bypass the heavy MicroBot constructor (Docker environment, LLM +creation) by constructing an uninitialized instance with ``object.__new__`` +and manually setting only the attributes the method under test needs. +""" +import sys +import os +import logging +import pytest +from unittest.mock import patch, Mock + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) + +from microbots.MicroBot import MicroBot +from microbots.constants import ModelProvider +from microbots.tools.tool_definitions.memory_tool import MemoryTool +from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _bare_microbot(model_provider: str, tools: list) -> MicroBot: + """Return an uninitialized MicroBot with only the attributes that + ``_upgrade_tools_for_provider`` inspects.""" + bot = object.__new__(MicroBot) + bot.model_provider = model_provider + bot.additional_tools = list(tools) + return bot + + +def _memory_tool(tmp_path, instructions: str = "default instructions") -> MemoryTool: + return MemoryTool( + memory_dir=str(tmp_path / "memory"), + usage_instructions_to_llm=instructions, + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestUpgradeToolsForProvider: + + # -- Anthropic provider: MemoryTool → AnthropicMemoryTool --------------- + + def test_memory_tool_is_replaced_with_anthropic_variant(self, tmp_path): + tool = _memory_tool(tmp_path) + bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool]) + + bot._upgrade_tools_for_provider() + + assert len(bot.additional_tools) == 1 + assert isinstance(bot.additional_tools[0], AnthropicMemoryTool) + + def test_memory_dir_is_forwarded_to_upgraded_tool(self, tmp_path): + mem_dir = str(tmp_path / "my_memory") + tool = MemoryTool(memory_dir=mem_dir) + bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool]) + + bot._upgrade_tools_for_provider() + + upgraded = bot.additional_tools[0] + assert isinstance(upgraded, AnthropicMemoryTool) + assert str(upgraded.memory_dir) == mem_dir + + def test_usage_instructions_are_forwarded_to_upgraded_tool(self, tmp_path): + custom_instructions = "custom memory instructions for test" + tool = _memory_tool(tmp_path, instructions=custom_instructions) + bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool]) + + bot._upgrade_tools_for_provider() + + upgraded = bot.additional_tools[0] + assert upgraded.usage_instructions_to_llm == custom_instructions + + def test_already_anthropic_memory_tool_is_not_re_upgraded(self, tmp_path): + existing = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory")) + bot = _bare_microbot(ModelProvider.ANTHROPIC, [existing]) + + bot._upgrade_tools_for_provider() + + assert len(bot.additional_tools) == 1 + assert bot.additional_tools[0] is existing + + def test_non_memory_tools_are_kept_unchanged(self, tmp_path): + other_tool = Mock() + other_tool.__class__ = Mock # not a MemoryTool subclass + bot = _bare_microbot(ModelProvider.ANTHROPIC, [other_tool]) + + bot._upgrade_tools_for_provider() + + assert len(bot.additional_tools) == 1 + assert bot.additional_tools[0] is other_tool + + def test_mixed_tool_list_upgrades_only_memory_tools(self, tmp_path): + plain_memory = _memory_tool(tmp_path) + already_upgraded = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory2")) + other_tool = Mock(spec=[]) + bot = _bare_microbot(ModelProvider.ANTHROPIC, [plain_memory, already_upgraded, other_tool]) + + bot._upgrade_tools_for_provider() + + assert len(bot.additional_tools) == 3 + # first: should have been upgraded + assert isinstance(bot.additional_tools[0], AnthropicMemoryTool) + assert bot.additional_tools[0] is not plain_memory + # second: already AnthropicMemoryTool, untouched + assert bot.additional_tools[1] is already_upgraded + # third: non-memory tool, untouched + assert bot.additional_tools[2] is other_tool + + def test_empty_tool_list_is_a_no_op(self): + bot = _bare_microbot(ModelProvider.ANTHROPIC, []) + + bot._upgrade_tools_for_provider() + + assert bot.additional_tools == [] + + def test_logger_info_called_for_each_upgraded_tool(self, tmp_path, caplog): + tool1 = _memory_tool(tmp_path) + tmp_path2 = tmp_path / "sub" + tmp_path2.mkdir() + tool2 = _memory_tool(tmp_path2) + bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool1, tool2]) + + with caplog.at_level(logging.INFO, logger=" MicroBot "): + bot._upgrade_tools_for_provider() + + upgrade_logs = [r for r in caplog.records if "Auto-upgrading" in r.message] + assert len(upgrade_logs) == 2 + + # -- Non-Anthropic providers: no upgrade should happen ------------------ + + @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL]) + def test_no_upgrade_for_non_anthropic_provider(self, tmp_path, provider): + tool = _memory_tool(tmp_path) + bot = _bare_microbot(provider, [tool]) + + bot._upgrade_tools_for_provider() + + assert len(bot.additional_tools) == 1 + assert isinstance(bot.additional_tools[0], MemoryTool) + assert not isinstance(bot.additional_tools[0], AnthropicMemoryTool) + + @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL]) + def test_original_tool_identity_preserved_for_non_anthropic(self, tmp_path, provider): + tool = _memory_tool(tmp_path) + bot = _bare_microbot(provider, [tool]) + + bot._upgrade_tools_for_provider() + + assert bot.additional_tools[0] is tool diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py new file mode 100644 index 0000000..6668bfc --- /dev/null +++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py @@ -0,0 +1,347 @@ +""" +Unit tests for AnthropicMemoryTool. + +Covers: + - __init__: memory_dir / usage_instructions forwarding and defaults + - is_model_supported + - is_invoked + - clear_all / clear_all_memory (SDK override) + - SDK overrides: view, create, str_replace, insert, delete, rename + (happy-path + RuntimeError on failure) +""" +import logging +import pytest + +from anthropic.types.beta import ( + BetaMemoryTool20250818CreateCommand, + BetaMemoryTool20250818DeleteCommand, + BetaMemoryTool20250818InsertCommand, + BetaMemoryTool20250818RenameCommand, + BetaMemoryTool20250818StrReplaceCommand, + BetaMemoryTool20250818ViewCommand, +) + +from microbots.tools.tool_definitions.anthropic_memory_tool import ( + DEFAULT_MEMORY_INSTRUCTIONS, + AnthropicMemoryTool, +) +from microbots.tools.tool_definitions.memory_tool import MemoryTool + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def make_tool(tmp_path) -> AnthropicMemoryTool: + return AnthropicMemoryTool(memory_dir=str(tmp_path / "memory")) + + +# --------------------------------------------------------------------------- +# __init__ +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolInit: + + def test_is_subclass_of_memory_tool(self, tmp_path): + assert isinstance(make_tool(tmp_path), MemoryTool) + + def test_memory_dir_is_forwarded(self, tmp_path): + mem_dir = str(tmp_path / "my_memory") + tool = AnthropicMemoryTool(memory_dir=mem_dir) + assert str(tool._memory_dir) == mem_dir + + def test_memory_dir_is_created_on_init(self, tmp_path): + mem_dir = tmp_path / "new_memory" + assert not mem_dir.exists() + AnthropicMemoryTool(memory_dir=str(mem_dir)) + assert mem_dir.exists() + + def test_default_memory_dir_under_home(self, monkeypatch, tmp_path): + from pathlib import Path + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) + tool = AnthropicMemoryTool() + assert tool._memory_dir == tmp_path / ".microbots" / "memory" + + def test_custom_usage_instructions_are_stored(self, tmp_path): + custom = "custom instructions" + tool = AnthropicMemoryTool( + memory_dir=str(tmp_path / "memory"), + usage_instructions=custom, + ) + assert tool.usage_instructions_to_llm == custom + + def test_default_usage_instructions_are_applied_when_none(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.usage_instructions_to_llm == DEFAULT_MEMORY_INSTRUCTIONS + + +# --------------------------------------------------------------------------- +# is_model_supported +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolIsModelSupported: + + def test_returns_true_for_claude_models(self, tmp_path): + tool = make_tool(tmp_path) + for model in ("claude-3-sonnet", "claude-3-5-haiku", "Claude-Opus-4"): + assert tool.is_model_supported(model) is True + + def test_returns_false_for_non_claude_models(self, tmp_path): + tool = make_tool(tmp_path) + for model in ("gpt-4", "ollama/llama3", "azure-openai/gpt-5", ""): + assert tool.is_model_supported(model) is False + + +# --------------------------------------------------------------------------- +# is_invoked +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolIsInvoked: + + def test_always_returns_false(self, tmp_path): + tool = make_tool(tmp_path) + for cmd in ("memory view /memories", "memory clear", "anything", ""): + assert tool.is_invoked(cmd) is False + + +# --------------------------------------------------------------------------- +# clear_all / clear_all_memory +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolClearAll: + + def test_clear_all_removes_all_files(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "a.md").write_text("a") + (tool._memory_dir / "b.md").write_text("b") + + tool.clear_all() + + assert list(tool._memory_dir.iterdir()) == [] + + def test_clear_all_leaves_memory_dir_intact(self, tmp_path): + tool = make_tool(tmp_path) + tool.clear_all() + assert tool._memory_dir.exists() + + def test_clear_all_logs_info(self, tmp_path, caplog): + tool = make_tool(tmp_path) + with caplog.at_level(logging.INFO): + tool.clear_all() + assert "AnthropicMemoryTool" in caplog.text + assert "cleared" in caplog.text + + def test_clear_all_memory_returns_string(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.clear_all_memory() + assert result == "All memory cleared" + + def test_clear_all_memory_removes_files(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + tool.clear_all_memory() + + assert list(tool._memory_dir.iterdir()) == [] + + +# --------------------------------------------------------------------------- +# view (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolView: + + def test_view_returns_file_contents(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "notes.md").write_text("hello\nworld\n") + + cmd = BetaMemoryTool20250818ViewCommand( + command="view", path="/memories/notes.md", view_range=None + ) + result = tool.view(cmd) + + assert "hello" in result + assert "world" in result + + def test_view_with_view_range(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n") + + cmd = BetaMemoryTool20250818ViewCommand( + command="view", path="/memories/f.md", view_range=[2, 4] + ) + result = tool.view(cmd) + + assert "b" in result + assert "d" in result + assert "a" not in result + assert "e" not in result + + def test_view_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818ViewCommand( + command="view", path="/memories/nonexistent.md", view_range=None + ) + with pytest.raises(RuntimeError): + tool.view(cmd) + + +# --------------------------------------------------------------------------- +# create (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolCreate: + + def test_create_writes_file(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818CreateCommand( + command="create", path="/memories/new.md", file_text="hello world" + ) + result = tool.create(cmd) + + assert "new.md" in result + assert (tool._memory_dir / "new.md").read_text() == "hello world" + + def test_create_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + # Path traversal should cause _create to fail via _resolve + cmd = BetaMemoryTool20250818CreateCommand( + command="create", path="/memories/../../etc/evil.md", file_text="x" + ) + with pytest.raises((RuntimeError, ValueError)): + tool.create(cmd) + + +# --------------------------------------------------------------------------- +# str_replace (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolStrReplace: + + def test_str_replace_edits_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + cmd = BetaMemoryTool20250818StrReplaceCommand( + command="str_replace", + path="/memories/f.md", + old_str="hello", + new_str="goodbye", + ) + result = tool.str_replace(cmd) + + assert "f.md" in result + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + + def test_str_replace_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + cmd = BetaMemoryTool20250818StrReplaceCommand( + command="str_replace", + path="/memories/f.md", + old_str="not present", + new_str="x", + ) + with pytest.raises(RuntimeError): + tool.str_replace(cmd) + + +# --------------------------------------------------------------------------- +# insert (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolInsert: + + def test_insert_prepends_line(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + cmd = BetaMemoryTool20250818InsertCommand( + command="insert", + path="/memories/f.md", + insert_line=0, + insert_text="prepended", + ) + result = tool.insert(cmd) + + assert "0" in result or "prepended" in result or "f.md" in result + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + + def test_insert_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818InsertCommand( + command="insert", + path="/memories/missing.md", + insert_line=0, + insert_text="x", + ) + with pytest.raises(RuntimeError): + tool.insert(cmd) + + +# --------------------------------------------------------------------------- +# delete (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolDelete: + + def test_delete_removes_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + cmd = BetaMemoryTool20250818DeleteCommand( + command="delete", path="/memories/f.md" + ) + tool.delete(cmd) + + assert not (tool._memory_dir / "f.md").exists() + + def test_delete_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818DeleteCommand( + command="delete", path="/memories/nonexistent.md" + ) + with pytest.raises(RuntimeError): + tool.delete(cmd) + + +# --------------------------------------------------------------------------- +# rename (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolRename: + + def test_rename_moves_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "old.md").write_text("content") + + cmd = BetaMemoryTool20250818RenameCommand( + command="rename", + old_path="/memories/old.md", + new_path="/memories/new.md", + ) + tool.rename(cmd) + + assert not (tool._memory_dir / "old.md").exists() + assert (tool._memory_dir / "new.md").read_text() == "content" + + def test_rename_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818RenameCommand( + command="rename", + old_path="/memories/missing.md", + new_path="/memories/new.md", + ) + with pytest.raises(RuntimeError): + tool.rename(cmd) diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py index 2c317f6..5679d5e 100644 --- a/test/tools/tool_definitions/test_memory_tool.py +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -98,6 +98,20 @@ def test_resolve_rejects_non_memory_paths(self, tmp_path): with pytest.raises(ValueError): tool._resolve(bad) + def test_resolve_bare_relative_path_treated_as_relative_to_memory_dir(self, tmp_path): + """The else branch: a path without a /memories/ prefix is resolved + relative to memory_dir.""" + tool = make_tool(tmp_path) + resolved = tool._resolve("notes.md") + assert resolved == (tool._memory_dir / "notes.md").resolve() + + def test_resolve_bare_relative_subdir_path(self, tmp_path): + """A bare relative path with subdirectory components is also resolved + relative to memory_dir (else branch).""" + tool = make_tool(tmp_path) + resolved = tool._resolve("sub/dir/file.md") + assert resolved == (tool._memory_dir / "sub" / "dir" / "file.md").resolve() + # --------------------------------------------------------------------------- # _view @@ -231,6 +245,13 @@ def test_str_replace_missing_flags_returns_error(self, tmp_path): result = tool._str_replace(["/memories/f.md"]) assert result.return_code != 0 + def test_str_replace_empty_args_returns_usage_error(self, tmp_path): + """if not args branch: calling _str_replace([]) returns the usage message.""" + tool = make_tool(tmp_path) + result = tool._str_replace([]) + assert result.return_code == 1 + assert "Usage: memory str_replace" in result.stderr + def test_str_replace_nonexistent_file_returns_error(self, tmp_path): tool = make_tool(tmp_path) result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"]) @@ -284,6 +305,13 @@ def test_insert_missing_flags_returns_error(self, tmp_path): result = tool._insert(["/memories/f.md"]) assert result.return_code != 0 + def test_insert_empty_args_returns_usage_error(self, tmp_path): + """if not args branch: calling _insert([]) returns the usage message.""" + tool = make_tool(tmp_path) + result = tool._insert([]) + assert result.return_code == 1 + assert "Usage: memory insert" in result.stderr + # --------------------------------------------------------------------------- # _delete @@ -394,6 +422,19 @@ def test_clear_leaves_memory_dir_intact(self, tmp_path): assert tool._memory_dir.exists() +# --------------------------------------------------------------------------- +# is_model_supported +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolIsModelSupported: + + def test_returns_true_for_any_model(self, tmp_path): + tool = make_tool(tmp_path) + for model in ("gpt-4", "claude-3-sonnet", "ollama/llama3", ""): + assert tool.is_model_supported(model) is True + + # --------------------------------------------------------------------------- # invoke — full command dispatch # --------------------------------------------------------------------------- @@ -444,6 +485,69 @@ def test_invoke_handles_bad_quoting_gracefully(self, tmp_path): assert result.return_code != 0 assert "Parse error" in result.stderr + def test_invoke_str_replace_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + result = tool.invoke( + 'memory str_replace /memories/f.md --old "hello" --new "goodbye"', + parent_bot=Mock(), + ) + + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + + def test_invoke_insert_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + result = tool.invoke( + 'memory insert /memories/f.md --line 0 --text "prepended"', + parent_bot=Mock(), + ) + + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + + def test_invoke_delete_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + f = tool._memory_dir / "f.md" + f.write_text("data") + + result = tool.invoke("memory delete /memories/f.md", parent_bot=Mock()) + + assert result.return_code == 0 + assert not f.exists() + + def test_invoke_rename_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "old.md").write_text("content") + + result = tool.invoke( + "memory rename /memories/old.md /memories/new.md", + parent_bot=Mock(), + ) + + assert result.return_code == 0 + assert (tool._memory_dir / "new.md").read_text() == "content" + assert not (tool._memory_dir / "old.md").exists() + + def test_invoke_exception_returned_as_error_cmdreturn(self, tmp_path): + """ValueError/FileNotFoundError/RuntimeError raised inside a subcommand + are caught and returned as a CmdReturn with return_code=1.""" + tool = make_tool(tmp_path) + + # str_replace on a non-existent file raises FileNotFoundError + result = tool.invoke( + 'memory str_replace /memories/missing.md --old "x" --new "y"', + parent_bot=Mock(), + ) + + assert result.return_code == 1 + assert result.stdout == "" + assert result.stderr != "" + if __name__ == "__main__": pytest.main([__file__, "-v"]) From b4d753e938ac4861970427068253df19009f8969 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Thu, 5 Mar 2026 09:23:30 +0000 Subject: [PATCH 07/10] Add tests for coverage --- .../test_anthropic_memory_tool.py | 15 +++++-- .../tool_definitions/test_memory_tool.py | 40 ++++++++++++++++--- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py index 6668bfc..db7069e 100644 --- a/test/tools/tool_definitions/test_anthropic_memory_tool.py +++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py @@ -209,13 +209,20 @@ def test_create_writes_file(self, tmp_path): assert (tool._memory_dir / "new.md").read_text() == "hello world" def test_create_raises_runtime_error_on_failure(self, tmp_path): + """Ensures the `raise RuntimeError(result.stderr)` branch is exercised by + mocking _create to return a non-zero CmdReturn.""" + from unittest.mock import patch + from microbots.environment.Environment import CmdReturn + tool = make_tool(tmp_path) - # Path traversal should cause _create to fail via _resolve cmd = BetaMemoryTool20250818CreateCommand( - command="create", path="/memories/../../etc/evil.md", file_text="x" + command="create", path="/memories/new.md", file_text="x" ) - with pytest.raises((RuntimeError, ValueError)): - tool.create(cmd) + with patch.object( + tool, "_create", return_value=CmdReturn(stdout="", stderr="disk full", return_code=1) + ): + with pytest.raises(RuntimeError, match="disk full"): + tool.create(cmd) # --------------------------------------------------------------------------- diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py index 5679d5e..089ca52 100644 --- a/test/tools/tool_definitions/test_memory_tool.py +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -166,6 +166,14 @@ def test_view_no_args_returns_error(self, tmp_path): result = tool._view([]) assert result.return_code != 0 + def test_view_unknown_flag_is_skipped(self, tmp_path): + """else: i += 1 — unrecognised flags are silently skipped.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello\n") + result = tool._view(["/memories/f.md", "--bogus", "value"]) + assert result.return_code == 0 + assert "hello" in result.stdout + # --------------------------------------------------------------------------- # _create @@ -257,6 +265,16 @@ def test_str_replace_nonexistent_file_returns_error(self, tmp_path): result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"]) assert result.return_code != 0 + def test_str_replace_unknown_flag_is_skipped(self, tmp_path): + """else: i += 1 — unrecognised flags in the arg loop are silently skipped.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + result = tool._str_replace([ + "/memories/f.md", "--bogus", "ignored", "--old", "hello", "--new", "goodbye" + ]) + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + # --------------------------------------------------------------------------- # _insert @@ -312,6 +330,17 @@ def test_insert_empty_args_returns_usage_error(self, tmp_path): assert result.return_code == 1 assert "Usage: memory insert" in result.stderr + def test_insert_unknown_flag_is_skipped(self, tmp_path): + """else: i += 1 — unrecognised flags in the arg loop are silently skipped.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + result = tool._insert([ + "/memories/f.md", "--bogus", "ignored", "--line", "0", "--text", "prepended" + ]) + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + # --------------------------------------------------------------------------- # _delete @@ -534,19 +563,20 @@ def test_invoke_rename_subcommand(self, tmp_path): assert not (tool._memory_dir / "old.md").exists() def test_invoke_exception_returned_as_error_cmdreturn(self, tmp_path): - """ValueError/FileNotFoundError/RuntimeError raised inside a subcommand - are caught and returned as a CmdReturn with return_code=1.""" + """except (ValueError, FileNotFoundError, RuntimeError) block: + a path-traversal path causes _resolve() to raise ValueError inside a + subcommand handler, which is caught and returned as CmdReturn(return_code=1).""" tool = make_tool(tmp_path) - # str_replace on a non-existent file raises FileNotFoundError + # Path traversal triggers ValueError inside _view → caught by except block result = tool.invoke( - 'memory str_replace /memories/missing.md --old "x" --new "y"', + "memory view /memories/../../etc/passwd", parent_bot=Mock(), ) assert result.return_code == 1 assert result.stdout == "" - assert result.stderr != "" + assert "traversal" in result.stderr.lower() or result.stderr != "" if __name__ == "__main__": From 579e6a349355077676ea59211fe7ffc2f7ad6dcd Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Tue, 10 Mar 2026 11:54:18 +0000 Subject: [PATCH 08/10] move tool dispatch out of AnthropicApi into MicroBot run loop --- pytest.ini | 1 + src/microbots/MicroBot.py | 39 +- src/microbots/llm/anthropic_api.py | 117 +++-- src/microbots/llm/llm.py | 9 + .../tool_definitions/anthropic_memory_tool.py | 37 +- test/bot/test_memory_tool_integration.py | 439 ++++++++++++++++++ test/bot/test_upgrade_tools_for_provider.py | 110 ++--- test/llm/test_anthropic_api.py | 289 +++++------- .../test_anthropic_memory_tool.py | 14 +- 9 files changed, 710 insertions(+), 345 deletions(-) create mode 100644 test/bot/test_memory_tool_integration.py diff --git a/pytest.ini b/pytest.ini index d0e4f1f..80758ec 100644 --- a/pytest.ini +++ b/pytest.ini @@ -11,5 +11,6 @@ addopts = markers = unit: Unit tests integration: Integration tests + anthropic_integration: Integration tests requiring a real Anthropic API key slow: Slow tests docker: marks tests that require a running Docker daemon and pull container images diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py index 7b061d7..f2708b1 100644 --- a/src/microbots/MicroBot.py +++ b/src/microbots/MicroBot.py @@ -317,38 +317,7 @@ def _create_environment(self, folder_to_mount: Optional[Mount]): folder_to_mount=folder_to_mount, ) - def _upgrade_tools_for_provider(self): - """Auto-upgrade provider-agnostic tools to their provider-optimised variants. - - Currently: replaces any ``MemoryTool`` with ``AnthropicMemoryTool`` when - the provider is Anthropic so the model gets native structured tool-use - instead of the text-command loop. The ``memory_dir`` and any custom - ``usage_instructions_to_llm`` are forwarded to the upgraded instance. - """ - if self.model_provider != ModelProvider.ANTHROPIC: - return - - # Local imports to avoid pulling Anthropic SDK into non-Anthropic paths - from microbots.tools.tool_definitions.memory_tool import MemoryTool - from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool - - upgraded = [] - for tool in self.additional_tools: - if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool): - logger.info( - "🧠 Auto-upgrading MemoryTool → AnthropicMemoryTool for Anthropic provider" - ) - upgraded.append(AnthropicMemoryTool( - memory_dir=tool.memory_dir, - usage_instructions=tool.usage_instructions_to_llm, - )) - else: - upgraded.append(tool) - self.additional_tools = upgraded - def _create_llm(self): - self._upgrade_tools_for_provider() - # Append tool usage instructions to system prompt system_prompt_with_tools = self.system_prompt if self.system_prompt else "" if self.additional_tools: @@ -365,16 +334,10 @@ def _create_llm(self): system_prompt=system_prompt_with_tools, model_name=self.deployment_name ) elif self.model_provider == ModelProvider.ANTHROPIC: - # Detect Anthropic-native tools (e.g. AnthropicMemoryTool) by duck-typing: - # any tool that exposes both to_dict() and call() is a native Anthropic tool. - native_tools = [ - t for t in self.additional_tools - if callable(getattr(t, "to_dict", None)) and callable(getattr(t, "call", None)) - ] self.llm = AnthropicApi( system_prompt=system_prompt_with_tools, deployment_name=self.deployment_name, - native_tools=native_tools or None, + additional_tools=self.additional_tools, ) # No Else case required as model provider is already validated using _validate_model_and_provider diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py index 4a73ba5..a403f9c 100644 --- a/src/microbots/llm/anthropic_api.py +++ b/src/microbots/llm/anthropic_api.py @@ -21,12 +21,31 @@ class AnthropicApi(LLMInterface): + def upgrade_tools(self, tools: list) -> list: + """Replace ``MemoryTool`` with ``AnthropicMemoryTool`` for native tool-use.""" + from microbots.tools.tool_definitions.memory_tool import MemoryTool + from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + upgraded = [] + for tool in tools: + if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool): + logger.info( + "\U0001f9e0 Auto-upgrading MemoryTool \u2192 AnthropicMemoryTool for Anthropic provider" + ) + upgraded.append(AnthropicMemoryTool( + memory_dir=tool.memory_dir, + usage_instructions=tool.usage_instructions_to_llm, + )) + else: + upgraded.append(tool) + return upgraded + def __init__( self, system_prompt: str, deployment_name: str = deployment_name, max_retries: int = 3, - native_tools: Optional[List] = None, + additional_tools: Optional[List] = None, ): """ Parameters @@ -37,11 +56,10 @@ def __init__( The Anthropic model deployment name. max_retries : int Maximum number of retries for invalid LLM responses. - native_tools : Optional[List] - Anthropic-native tool objects (e.g. ``AnthropicMemoryTool``) that - have both a ``to_dict()`` and a ``call()`` method. These are passed - directly to the API and their tool-use blocks are dispatched here - before the JSON response is returned to the caller. + additional_tools : Optional[List] + Tool objects passed from MicroBot. Any provider-agnostic tools + (e.g. ``MemoryTool``) are silently upgraded to their Anthropic- + native variants, and their API schemas are extracted. """ self.ai_client = Anthropic( api_key=api_key, @@ -50,12 +68,18 @@ def __init__( self.deployment_name = deployment_name self.system_prompt = system_prompt self.messages = [] - self.native_tools = native_tools or [] - # Cache tool dicts once so _call_api and _dispatch_tool_use don't - # re-serialise on every invocation (important when multiple native - # tools are registered, e.g. memory + bash). - self._native_tool_dicts = [t.to_dict() for t in self.native_tools] - self._native_tools_by_name = {d["name"]: t for d, t in zip(self._native_tool_dicts, self.native_tools)} + + # Silently upgrade tools in-place and extract API schemas + tools = additional_tools or [] + upgraded = self.upgrade_tools(tools) + # Mutate the original list so the caller (MicroBot) sees upgraded tools + if additional_tools is not None: + additional_tools[:] = upgraded + self._tool_dicts = [ + t.to_dict() for t in upgraded + if callable(getattr(t, "to_dict", None)) + ] + self._pending_tool_response = None # Set these values here. This logic will be handled in the parent class. self.max_retries = max_retries @@ -66,7 +90,7 @@ def __init__( # ---------------------------------------------------------------------- # def _call_api(self) -> object: - """Call the Anthropic messages API, including native tools when present.""" + """Call the Anthropic messages API, including tool definitions when present.""" kwargs = dict( model=self.deployment_name, system=self.system_prompt, @@ -74,44 +98,24 @@ def _call_api(self) -> object: max_tokens=4096, ) - if self.native_tools: - kwargs["tools"] = self._native_tool_dicts + if self._tool_dicts: + kwargs["tools"] = self._tool_dicts return self.ai_client.messages.create(**kwargs) - def _dispatch_tool_use(self, response) -> None: - """Handle a tool_use response: execute each tool call and append results. + def _append_tool_result(self, response, result_text: str) -> None: + """Append the assistant tool_use turn and the corresponding tool_result user turn. - Mutates ``self.messages`` in place — appends the assistant turn (with - all content blocks) and the corresponding tool_result user turn. + Called when the caller provides the tool execution result via + the next ``ask()`` call. """ - # Append the full assistant message as-is (content is a list of blocks) assistant_content = [block.model_dump() for block in response.content] self.messages.append({"role": "assistant", "content": assistant_content}) - # Build tool_result entries for every tool_use block tool_results = [] for block in response.content: if block.type != "tool_use": continue - - # Find the matching native tool by name - tool = self._native_tools_by_name.get(block.name) - if tool is None: - result_text = f"Error: unknown tool '{block.name}'" - logger.error("Received tool_use for unknown tool: %s", block.name) - else: - try: - result_text = tool.call(block.input) - logger.info( - "🧠 Native tool '%s' executed. Result (first 200 chars): %s", - block.name, - str(result_text)[:200], - ) - except Exception as exc: - result_text = f"Error executing tool '{block.name}': {exc}" - logger.error("Native tool '%s' raised: %s", block.name, exc) - tool_results.append({ "type": "tool_result", "tool_use_id": block.id, @@ -127,18 +131,39 @@ def _dispatch_tool_use(self, response) -> None: def ask(self, message: str) -> LLMAskResponse: self.retries = 0 # reset retries for each ask. Handled in parent class. - self.messages.append({"role": "user", "content": message}) + if self._pending_tool_response: + # Previous response was tool_use — format this message as tool results. + self._append_tool_result(self._pending_tool_response, message) + self._pending_tool_response = None + else: + self.messages.append({"role": "user", "content": message}) valid = False while not valid: response = self._call_api() - # Dispatch any tool_use rounds before looking for a JSON response. - # The model may call the memory tool multiple times before producing - # its final JSON command. - while response.stop_reason == "tool_use": - self._dispatch_tool_use(response) - response = self._call_api() + if response.stop_reason == "tool_use": + # Return tool call info as an LLMAskResponse so the + # caller (MicroBot.run) can dispatch the tool. + self._pending_tool_response = response + + thoughts = "" + for block in response.content: + if block.type == "text": + thoughts = block.text + break + + tool_calls = [] + for block in response.content: + if block.type == "tool_use": + tool_calls.append({ + "name": block.name, + "id": block.id, + "input": block.input, + }) + + command = json.dumps({"native_tool_calls": tool_calls}) + return LLMAskResponse(task_done=False, thoughts=thoughts, command=command) # Extract text content from the final response response_text = "" diff --git a/src/microbots/llm/llm.py b/src/microbots/llm/llm.py index 2800790..e7e5c22 100644 --- a/src/microbots/llm/llm.py +++ b/src/microbots/llm/llm.py @@ -29,6 +29,15 @@ def ask(self, message: str) -> LLMAskResponse: def clear_history(self) -> bool: pass + def upgrade_tools(self, tools: list) -> list: + """Upgrade tools for the specific LLM provider. + + The default implementation is a no-op. Subclasses (e.g. + ``AnthropicApi``) override this to swap provider-agnostic tools + with their native equivalents. + """ + return tools + def _validate_llm_response(self, response: str) -> tuple[bool, LLMAskResponse]: if self.retries >= self.max_retries: diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py index 9cf547e..ef26182 100644 --- a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py +++ b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py @@ -21,8 +21,9 @@ The memory tool (type ``memory_20250818``) is available in the standard Anthropic library and does not require a beta endpoint or header. Pass it via ``tools=[{"type": "memory_20250818", "name": "memory"}]`` on a regular -``client.messages.create(...)`` call. ``AnthropicApi`` handles this -automatically when ``native_tools`` contains an ``AnthropicMemoryTool``. +``client.messages.create(...)`` call. ``MicroBot`` auto-upgrades +``MemoryTool`` to ``AnthropicMemoryTool`` for Anthropic providers and +passes the tool schema to ``AnthropicApi`` via ``tool_dicts``. Usage: from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool @@ -33,6 +34,7 @@ from __future__ import annotations +import json from logging import getLogger from pathlib import Path @@ -48,6 +50,7 @@ BetaMemoryTool20250818ViewCommand, ) +from microbots.environment.Environment import CmdReturn from microbots.tools.tool_definitions.memory_tool import MemoryTool logger = getLogger(__name__) @@ -116,10 +119,36 @@ def is_model_supported(self, model_name: str) -> bool: return "claude" in model_name.lower() def is_invoked(self, command: str) -> bool: - """Return False — this tool is dispatched natively by AnthropicApi, - not via the shell command loop.""" + """Return True when the command is a serialized native_tool_calls JSON + containing a call to the ``memory`` tool.""" + try: + data = json.loads(command) + if "native_tool_calls" in data: + return any(tc["name"] == "memory" for tc in data["native_tool_calls"]) + except (json.JSONDecodeError, KeyError, TypeError): + pass return False + def invoke(self, command: str, parent_bot) -> CmdReturn: + """Execute all memory tool calls in the serialized native_tool_calls batch.""" + data = json.loads(command) + results = [] + for tc in data["native_tool_calls"]: + if tc["name"] != "memory": + continue + try: + result = self.call(tc["input"]) + logger.info( + "\U0001f9e0 Native tool 'memory' executed. Result (first 200 chars): %s", + str(result)[:200], + ) + results.append(str(result)) + except Exception as exc: + logger.error("Native tool 'memory' raised: %s", exc) + results.append(f"Error executing tool 'memory': {exc}") + combined = "\n".join(results) + return CmdReturn(stdout=combined, stderr="", return_code=0) + def clear_all(self) -> None: """Delete all memory files (useful for testing or resetting state).""" self._clear() diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py new file mode 100644 index 0000000..25c717d --- /dev/null +++ b/test/bot/test_memory_tool_integration.py @@ -0,0 +1,439 @@ +"""Tests for the Anthropic memory tool end-to-end flow. + +Unit tests (mocked API): + Verify wiring — auto-upgrade, tool dispatch, and memory file operations + with a mocked Anthropic client. Fast, free, no API key needed. + +Integration tests (real API): + Hit the actual Anthropic API to verify the full round-trip. + Gated behind ``@pytest.mark.anthropic_integration``. + Require ``ANTHROPIC_API_KEY`` in .env. +""" + +import json +import os +import sys +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest +from dotenv import load_dotenv + +load_dotenv() + +sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")) +) + +from microbots import MicroBot, BotRunResult +from microbots.llm.llm import llm_output_format_str +from microbots.tools.tool_definitions.memory_tool import MemoryTool +from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_tool_use_response(tool_name, tool_id, tool_input, thinking_text=""): + """Build a mock Anthropic API response with stop_reason='tool_use'.""" + blocks = [] + + if thinking_text: + text_block = Mock() + text_block.type = "text" + text_block.text = thinking_text + blocks.append(text_block) + + tool_block = Mock() + tool_block.type = "tool_use" + tool_block.name = tool_name + tool_block.id = tool_id + tool_block.input = tool_input + tool_block.model_dump = Mock(return_value={ + "type": "tool_use", + "id": tool_id, + "name": tool_name, + "input": tool_input, + }) + blocks.append(tool_block) + + resp = Mock() + resp.stop_reason = "tool_use" + resp.content = blocks + return resp + + +def _make_end_turn_response(task_done, thoughts, command=""): + """Build a mock Anthropic API response with stop_reason='end_turn'.""" + payload = json.dumps({ + "task_done": task_done, + "thoughts": thoughts, + "command": command, + }) + + text_block = Mock() + text_block.type = "text" + text_block.text = payload + + resp = Mock() + resp.stop_reason = "end_turn" + resp.content = [text_block] + return resp + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolWiring: + """Unit tests — mocked Anthropic client, real tool dispatch and file ops.""" + + @pytest.fixture() + def memory_dir(self, tmp_path): + d = tmp_path / "memory" + d.mkdir() + return d + + @pytest.fixture() + def bot(self, memory_dir): + """Create a MicroBot with Anthropic provider and a MemoryTool. + + The Anthropic client is mocked, but the rest of the stack is real: + auto-upgrade, tool dispatch, and memory file operations. + """ + tool = MemoryTool( + memory_dir=str(memory_dir), + usage_instructions_to_llm="Use the memory tool to persist notes.", + ) + + mock_env = Mock() + mock_env.execute.return_value = Mock(return_code=0, stdout="", stderr="") + + anthropic_deployment = "claude-sonnet-4-5" + + with patch("microbots.llm.anthropic_api.Anthropic") as mock_anthropic_cls, \ + patch("microbots.llm.anthropic_api.api_key", "test-key"), \ + patch("microbots.llm.anthropic_api.endpoint", "https://api.anthropic.com"), \ + patch("microbots.llm.anthropic_api.deployment_name", anthropic_deployment): + + bot = MicroBot( + model=f"anthropic/{anthropic_deployment}", + system_prompt="You are a helpful assistant.", + additional_tools=[tool], + environment=mock_env, + ) + + self._mock_client = mock_anthropic_cls.return_value + yield bot + del bot + + # -- Upgrade verification ----------------------------------------------- + + def test_memory_tool_auto_upgraded_to_anthropic_variant(self, bot): + """MemoryTool passed to MicroBot should be auto-upgraded to AnthropicMemoryTool.""" + upgraded_tools = bot.additional_tools + memory_tools = [t for t in upgraded_tools if isinstance(t, AnthropicMemoryTool)] + assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade" + + def test_tool_dicts_include_memory_schema(self, bot): + """The LLM should have received the memory tool schema.""" + assert len(bot.llm._tool_dicts) == 1 + assert bot.llm._tool_dicts[0]["type"] == "memory_20250818" + + # -- Create file via tool_use ------------------------------------------- + + def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir): + """LLM requests a memory create → MicroBot dispatches → file appears on disk.""" + # Sequence: + # 1. ask(task) → API returns tool_use (memory create) + # 2. ask(tool_result) → API returns end_turn (task_done=True) + self._mock_client.messages.create.side_effect = [ + _make_tool_use_response( + tool_name="memory", + tool_id="tool_001", + tool_input={ + "command": "create", + "path": "/memories/notes.md", + "file_text": "Hello from integration test", + }, + thinking_text="I'll save a note to memory.", + ), + _make_end_turn_response( + task_done=True, + thoughts="Saved a note to memory successfully.", + ), + ] + + result: BotRunResult = bot.run( + "Save a note saying 'Hello from integration test'", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + assert result.error is None + + # Verify the file was actually created on disk + # _resolve("/memories/notes.md") strips the "memories/" prefix → memory_dir/notes.md + created_file = memory_dir / "notes.md" + assert created_file.exists(), f"Expected {created_file} to be created" + assert created_file.read_text() == "Hello from integration test" + + # -- View file via tool_use --------------------------------------------- + + def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir): + """LLM requests a memory view → MicroBot dispatches → file content returned.""" + # Pre-create a file in memory + # _resolve("/memories/existing.md") → memory_dir/existing.md + (memory_dir / "existing.md").write_text("Previously saved content") + + self._mock_client.messages.create.side_effect = [ + _make_tool_use_response( + tool_name="memory", + tool_id="tool_002", + tool_input={ + "command": "view", + "path": "/memories/existing.md", + }, + thinking_text="Let me check my memory.", + ), + _make_end_turn_response( + task_done=True, + thoughts="Found previously saved content in memory.", + ), + ] + + result: BotRunResult = bot.run( + "Check your memory for existing notes", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + + # Verify the view result was passed back to the API as tool_result + calls = self._mock_client.messages.create.call_args_list + assert len(calls) == 2 + # The second call should have messages including the tool_result + second_call_messages = calls[1].kwargs.get("messages") or calls[1][1].get("messages", []) + tool_result_msgs = [ + m for m in second_call_messages + if m.get("role") == "user" and isinstance(m.get("content"), list) + and any(c.get("type") == "tool_result" for c in m["content"]) + ] + assert len(tool_result_msgs) >= 1, "Expected a tool_result message in the second API call" + # The tool_result content should contain the file content + tool_result_content = tool_result_msgs[-1]["content"][0]["content"] + assert "Previously saved content" in tool_result_content + + # -- Multiple tool calls in sequence ------------------------------------ + + def test_create_then_view_memory_file(self, bot, memory_dir): + """LLM creates a file, then views it — both dispatched via MicroBot loop.""" + self._mock_client.messages.create.side_effect = [ + # Step 1: create file + _make_tool_use_response( + tool_name="memory", + tool_id="tool_003", + tool_input={ + "command": "create", + "path": "/memories/todo.md", + "file_text": "- Fix bug #42\n- Write tests", + }, + thinking_text="Creating a todo list.", + ), + # Step 2: view file + _make_tool_use_response( + tool_name="memory", + tool_id="tool_004", + tool_input={ + "command": "view", + "path": "/memories/todo.md", + }, + thinking_text="Let me verify what I wrote.", + ), + # Step 3: done + _make_end_turn_response( + task_done=True, + thoughts="Created and verified the todo list.", + ), + ] + + result: BotRunResult = bot.run( + "Create a todo list and verify it was saved", + max_iterations=10, + timeout_in_seconds=30, + ) + + assert result.status is True + assert result.error is None + + # File should exist with correct content + created_file = memory_dir / "todo.md" + assert created_file.exists() + assert "Fix bug #42" in created_file.read_text() + + # -- Non-memory commands still go to environment ------------------------ + + def test_non_memory_commands_go_to_environment(self, bot): + """Regular shell commands should be dispatched to the environment, not the memory tool.""" + self._mock_client.messages.create.side_effect = [ + _make_end_turn_response( + task_done=False, + thoughts="Let me check the files.", + command="ls -la", + ), + _make_end_turn_response( + task_done=True, + thoughts="Done.", + ), + ] + + result: BotRunResult = bot.run( + "List the files", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + # The environment.execute should have been called with "ls -la" + bot.environment.execute.assert_called_with("ls -la") + + +# --------------------------------------------------------------------------- +# Real integration tests — require ANTHROPIC_API_KEY +# --------------------------------------------------------------------------- + +MEMORY_SYSTEM_PROMPT = f"""You are a helpful assistant with access to a memory tool. +You can save and retrieve notes using the memory tool. +All your responses must be in this JSON format: +{llm_output_format_str} +The properties (task_done, thoughts, command) are mandatory on each response. +When you are done, set task_done to true and command to an empty string. +""" + + +@pytest.mark.anthropic_integration +@pytest.mark.docker +class TestMemoryToolRealApi: + """End-to-end integration tests that hit the real Anthropic API. + + These tests exercise the full MicroBot → AnthropicApi → memory tool + pipeline with no mocking. A real Docker environment is created + (matching the AgentBoss integration test pattern). + + Run with:: + + pytest -m anthropic_integration + + Requires ``ANTHROPIC_API_KEY`` in ``.env``. + """ + + @pytest.fixture() + def memory_dir(self, tmp_path): + d = tmp_path / "memory" + d.mkdir() + return d + + @pytest.fixture() + def memory_bot(self, memory_dir): + """Create a MicroBot with the real Anthropic API, real Docker env, + and a MemoryTool. No mocking — fully end-to-end. + """ + tool = MemoryTool( + memory_dir=str(memory_dir), + usage_instructions_to_llm="Use the memory tool to persist notes.", + ) + + anthropic_deployment = os.getenv("ANTHROPIC_DEPLOYMENT_NAME", "claude-sonnet-4-5") + + bot = MicroBot( + model=f"anthropic/{anthropic_deployment}", + system_prompt=MEMORY_SYSTEM_PROMPT, + additional_tools=[tool], + ) + + yield bot + del bot + + def test_memory_tool_auto_upgraded(self, memory_bot): + """MemoryTool should be silently auto-upgraded to AnthropicMemoryTool.""" + memory_tools = [t for t in memory_bot.additional_tools if isinstance(t, AnthropicMemoryTool)] + assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade" + + def test_create_memory_file(self, memory_bot, memory_dir): + """MicroBot should persist a debugging plan to memory. + + The LLM is expected to: + 1. Receive a task about planning a debugging session. + 2. Decide to persist the plan using the memory tool. + 3. Confirm the task is done. + + We verify the plan was actually written to disk. + """ + result: BotRunResult = memory_bot.run( + task=( + "You are investigating a bug where the server returns HTTP 500 " + "on POST /api/users. Create a debugging plan that includes: " + "1) check server logs, 2) reproduce the request with curl, " + "3) inspect the database connection. " + "Persist this plan so you can resume later if interrupted." + ), + max_iterations=10, + timeout_in_seconds=60, + ) + + assert result.status is True, f"Task failed: {result.error}" + assert result.error is None + + # The LLM should have used the memory tool to persist the plan + saved_files = [f for f in memory_dir.rglob("*") if f.is_file()] + assert len(saved_files) >= 1, ( + f"Expected at least one file created in memory. " + f"Found: {saved_files}" + ) + combined_content = "\n".join(f.read_text() for f in saved_files).lower() + assert "log" in combined_content or "curl" in combined_content or "database" in combined_content, ( + f"Expected debugging plan content in memory files. Content: {combined_content}" + ) + + def test_create_and_view_roundtrip(self, memory_bot, memory_dir): + """MicroBot should save findings and then review them before reporting. + + The LLM is expected to: + 1. Record analysis findings using the memory tool. + 2. Review what it recorded to verify nothing was missed. + 3. Summarize the findings in its final thoughts. + + We verify: + - At least one file was written to disk. + - The LLM's summary references the recorded findings. + """ + result: BotRunResult = memory_bot.run( + task=( + "You analyzed a Python project and found these issues: " + "1) an unused import 'os' in utils.py, " + "2) a missing null check in handler.py line 42. " + "Record these findings, then review your notes and " + "summarize what you found in your final thoughts." + ), + max_iterations=15, + timeout_in_seconds=60, + ) + + assert result.status is True, f"Task failed: {result.error}" + assert result.error is None + + # The LLM should have created at least one memory file + saved_files = [f for f in memory_dir.rglob("*") if f.is_file()] + assert len(saved_files) >= 1, ( + f"Expected at least one file in memory. " + f"Found: {list(memory_dir.rglob('*'))}" + ) + + result_lower = result.result.lower() + assert "import" in result_lower or "null" in result_lower or "handler" in result_lower, ( + f"LLM should have summarized the findings. Got: {result.result}" + ) diff --git a/test/bot/test_upgrade_tools_for_provider.py b/test/bot/test_upgrade_tools_for_provider.py index f5d1e98..459afd4 100644 --- a/test/bot/test_upgrade_tools_for_provider.py +++ b/test/bot/test_upgrade_tools_for_provider.py @@ -1,13 +1,7 @@ -""" -Unit tests for MicroBot._upgrade_tools_for_provider. +"""Unit tests for AnthropicApi.upgrade_tools() method. These tests verify that plain ``MemoryTool`` instances are automatically -replaced with ``AnthropicMemoryTool`` when the model provider is Anthropic, -and that no changes are made for other providers or other tool types. - -All tests bypass the heavy MicroBot constructor (Docker environment, LLM -creation) by constructing an uninitialized instance with ``object.__new__`` -and manually setting only the attributes the method under test needs. +replaced with ``AnthropicMemoryTool`` when using ``AnthropicApi.upgrade_tools``. """ import sys import os @@ -17,8 +11,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) -from microbots.MicroBot import MicroBot -from microbots.constants import ModelProvider +from microbots.llm.anthropic_api import AnthropicApi from microbots.tools.tool_definitions.memory_tool import MemoryTool from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool @@ -27,15 +20,6 @@ # Helpers # --------------------------------------------------------------------------- -def _bare_microbot(model_provider: str, tools: list) -> MicroBot: - """Return an uninitialized MicroBot with only the attributes that - ``_upgrade_tools_for_provider`` inspects.""" - bot = object.__new__(MicroBot) - bot.model_provider = model_provider - bot.additional_tools = list(tools) - return bot - - def _memory_tool(tmp_path, instructions: str = "default instructions") -> MemoryTool: return MemoryTool( memory_dir=str(tmp_path / "memory"), @@ -50,112 +34,84 @@ def _memory_tool(tmp_path, instructions: str = "default instructions") -> Memory @pytest.mark.unit class TestUpgradeToolsForProvider: - # -- Anthropic provider: MemoryTool → AnthropicMemoryTool --------------- + @pytest.fixture(autouse=True) + def _create_api(self): + with patch("microbots.llm.anthropic_api.Anthropic"): + self.api = AnthropicApi(system_prompt="test") + + # -- AnthropicApi.upgrade_tools: MemoryTool → AnthropicMemoryTool -------- def test_memory_tool_is_replaced_with_anthropic_variant(self, tmp_path): tool = _memory_tool(tmp_path) - bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool]) - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([tool]) - assert len(bot.additional_tools) == 1 - assert isinstance(bot.additional_tools[0], AnthropicMemoryTool) + assert len(upgraded) == 1 + assert isinstance(upgraded[0], AnthropicMemoryTool) def test_memory_dir_is_forwarded_to_upgraded_tool(self, tmp_path): mem_dir = str(tmp_path / "my_memory") tool = MemoryTool(memory_dir=mem_dir) - bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool]) - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([tool]) - upgraded = bot.additional_tools[0] - assert isinstance(upgraded, AnthropicMemoryTool) - assert str(upgraded.memory_dir) == mem_dir + assert isinstance(upgraded[0], AnthropicMemoryTool) + assert str(upgraded[0].memory_dir) == mem_dir def test_usage_instructions_are_forwarded_to_upgraded_tool(self, tmp_path): custom_instructions = "custom memory instructions for test" tool = _memory_tool(tmp_path, instructions=custom_instructions) - bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool]) - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([tool]) - upgraded = bot.additional_tools[0] - assert upgraded.usage_instructions_to_llm == custom_instructions + assert upgraded[0].usage_instructions_to_llm == custom_instructions def test_already_anthropic_memory_tool_is_not_re_upgraded(self, tmp_path): existing = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory")) - bot = _bare_microbot(ModelProvider.ANTHROPIC, [existing]) - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([existing]) - assert len(bot.additional_tools) == 1 - assert bot.additional_tools[0] is existing + assert len(upgraded) == 1 + assert upgraded[0] is existing def test_non_memory_tools_are_kept_unchanged(self, tmp_path): other_tool = Mock() other_tool.__class__ = Mock # not a MemoryTool subclass - bot = _bare_microbot(ModelProvider.ANTHROPIC, [other_tool]) - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([other_tool]) - assert len(bot.additional_tools) == 1 - assert bot.additional_tools[0] is other_tool + assert len(upgraded) == 1 + assert upgraded[0] is other_tool def test_mixed_tool_list_upgrades_only_memory_tools(self, tmp_path): plain_memory = _memory_tool(tmp_path) already_upgraded = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory2")) other_tool = Mock(spec=[]) - bot = _bare_microbot(ModelProvider.ANTHROPIC, [plain_memory, already_upgraded, other_tool]) - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([plain_memory, already_upgraded, other_tool]) - assert len(bot.additional_tools) == 3 + assert len(upgraded) == 3 # first: should have been upgraded - assert isinstance(bot.additional_tools[0], AnthropicMemoryTool) - assert bot.additional_tools[0] is not plain_memory + assert isinstance(upgraded[0], AnthropicMemoryTool) + assert upgraded[0] is not plain_memory # second: already AnthropicMemoryTool, untouched - assert bot.additional_tools[1] is already_upgraded + assert upgraded[1] is already_upgraded # third: non-memory tool, untouched - assert bot.additional_tools[2] is other_tool + assert upgraded[2] is other_tool def test_empty_tool_list_is_a_no_op(self): - bot = _bare_microbot(ModelProvider.ANTHROPIC, []) - - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([]) - assert bot.additional_tools == [] + assert upgraded == [] def test_logger_info_called_for_each_upgraded_tool(self, tmp_path, caplog): tool1 = _memory_tool(tmp_path) tmp_path2 = tmp_path / "sub" tmp_path2.mkdir() tool2 = _memory_tool(tmp_path2) - bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool1, tool2]) - with caplog.at_level(logging.INFO, logger=" MicroBot "): - bot._upgrade_tools_for_provider() + with caplog.at_level(logging.INFO): + self.api.upgrade_tools([tool1, tool2]) upgrade_logs = [r for r in caplog.records if "Auto-upgrading" in r.message] assert len(upgrade_logs) == 2 - - # -- Non-Anthropic providers: no upgrade should happen ------------------ - - @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL]) - def test_no_upgrade_for_non_anthropic_provider(self, tmp_path, provider): - tool = _memory_tool(tmp_path) - bot = _bare_microbot(provider, [tool]) - - bot._upgrade_tools_for_provider() - - assert len(bot.additional_tools) == 1 - assert isinstance(bot.additional_tools[0], MemoryTool) - assert not isinstance(bot.additional_tools[0], AnthropicMemoryTool) - - @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL]) - def test_original_tool_identity_preserved_for_non_anthropic(self, tmp_path, provider): - tool = _memory_tool(tmp_path) - bot = _bare_microbot(provider, [tool]) - - bot._upgrade_tools_for_provider() - - assert bot.additional_tools[0] is tool diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py index 49674aa..2c11966 100644 --- a/test/llm/test_anthropic_api.py +++ b/test/llm/test_anthropic_api.py @@ -540,63 +540,37 @@ def test_anthropic_api_clear_history_integration(self): # ============================================================================ @pytest.mark.unit -class TestAnthropicApiNativeToolsInit: - """Tests for __init__ native_tools caching.""" +class TestAnthropicApiToolDictsInit: + """Tests for __init__ tool upgrade and tool_dicts extraction.""" @pytest.fixture(autouse=True) def _use_patch(self, patch_anthropic_config): pass - def test_init_without_native_tools_has_empty_caches(self): + def test_init_without_additional_tools_has_empty_tool_dicts(self): api = AnthropicApi(system_prompt="test") - assert api.native_tools == [] - assert api._native_tool_dicts == [] - assert api._native_tools_by_name == {} + assert api._tool_dicts == [] - def test_init_with_none_native_tools_has_empty_caches(self): - api = AnthropicApi(system_prompt="test", native_tools=None) + def test_init_with_none_additional_tools_has_empty_tool_dicts(self): + api = AnthropicApi(system_prompt="test", additional_tools=None) - assert api._native_tool_dicts == [] - assert api._native_tools_by_name == {} + assert api._tool_dicts == [] - def test_init_with_single_native_tool_caches_dict(self): + def test_init_with_tool_having_to_dict_extracts_dicts(self): tool = Mock() tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"} + # Ensure it's not a MemoryTool so upgrade_tools won't touch it + tool.__class__ = Mock - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + api = AnthropicApi(system_prompt="test", additional_tools=[tool]) - assert api._native_tool_dicts == [{"name": "memory", "type": "memory_20250818"}] + assert api._tool_dicts == [{"name": "memory", "type": "memory_20250818"}] - def test_init_with_single_native_tool_caches_by_name(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - - api = AnthropicApi(system_prompt="test", native_tools=[tool]) - - assert "memory" in api._native_tools_by_name - assert api._native_tools_by_name["memory"] is tool - - def test_init_with_multiple_native_tools_caches_all(self): - tool1 = Mock() - tool1.to_dict.return_value = {"name": "memory"} - tool2 = Mock() - tool2.to_dict.return_value = {"name": "bash"} - - api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2]) - - assert len(api._native_tool_dicts) == 2 - assert api._native_tools_by_name["memory"] is tool1 - assert api._native_tools_by_name["bash"] is tool2 - - def test_init_calls_to_dict_exactly_once_per_tool(self): - """to_dict() must not be called again on subsequent API calls.""" - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - - AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_init_sets_pending_tool_response_to_none(self): + api = AnthropicApi(system_prompt="test") - assert tool.to_dict.call_count == 1 + assert api._pending_tool_response is None @pytest.mark.unit @@ -617,10 +591,10 @@ def test_call_api_without_tools_omits_tools_kwarg(self): call_kwargs = api.ai_client.messages.create.call_args[1] assert "tools" not in call_kwargs - def test_call_api_with_tools_passes_cached_dicts(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"} - api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool]) + def test_call_api_with_tool_dicts_passes_them(self): + dicts = [{"name": "memory", "type": "memory_20250818"}] + api = AnthropicApi(system_prompt="test", deployment_name="claude-3") + api._tool_dicts = dicts api.messages = [{"role": "user", "content": "hello"}] api.ai_client.messages.create = Mock(return_value=Mock()) @@ -628,35 +602,17 @@ def test_call_api_with_tools_passes_cached_dicts(self): call_kwargs = api.ai_client.messages.create.call_args[1] assert "tools" in call_kwargs - assert call_kwargs["tools"] == [{"name": "memory", "type": "memory_20250818"}] - - def test_call_api_does_not_call_to_dict_again(self): - """to_dict() should only be called during __init__, never during _call_api.""" - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool]) - api.messages = [{"role": "user", "content": "hello"}] - api.ai_client.messages.create = Mock(return_value=Mock()) - - count_after_init = tool.to_dict.call_count # should be 1 - api._call_api() - api._call_api() - - assert tool.to_dict.call_count == count_after_init # no increase + assert call_kwargs["tools"] == dicts @pytest.mark.unit -class TestAnthropicApiDispatchToolUse: - """Tests for _dispatch_tool_use.""" +class TestAnthropicApiAppendToolResult: + """Tests for _append_tool_result.""" @pytest.fixture(autouse=True) def _use_patch(self, patch_anthropic_config): pass - # ------------------------------------------------------------------ # - # Helpers - # ------------------------------------------------------------------ # - @staticmethod def _tool_use_block(name, tool_id="tu_001", input_data=None): block = Mock() @@ -675,31 +631,21 @@ def _text_block(text="hello"): block.model_dump.return_value = {"type": "text", "text": text} return block - # ------------------------------------------------------------------ # - # Tests - # ------------------------------------------------------------------ # - - def test_dispatch_appends_assistant_message_first(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "ok" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_appends_assistant_message_first(self): + api = AnthropicApi(system_prompt="test") response = Mock() response.content = [self._tool_use_block("memory")] - api._dispatch_tool_use(response) + api._append_tool_result(response, "ok") assert api.messages[0]["role"] == "assistant" - def test_dispatch_appends_tool_result_user_message(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "file listing" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_appends_tool_result_user_message(self): + api = AnthropicApi(system_prompt="test") response = Mock() response.content = [self._tool_use_block("memory", tool_id="tu_abc")] - api._dispatch_tool_use(response) + api._append_tool_result(response, "file listing") user_msg = api.messages[1] assert user_msg["role"] == "user" @@ -707,100 +653,53 @@ def test_dispatch_appends_tool_result_user_message(self): assert user_msg["content"][0]["tool_use_id"] == "tu_abc" assert user_msg["content"][0]["content"] == "file listing" - def test_dispatch_calls_tool_with_correct_input(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "ok" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) - - input_data = {"command": "view", "path": "/memories"} - response = Mock() - response.content = [self._tool_use_block("memory", input_data=input_data)] - api._dispatch_tool_use(response) - - tool.call.assert_called_once_with(input_data) - - def test_dispatch_unknown_tool_returns_error_in_result(self): - api = AnthropicApi(system_prompt="test") # no native tools - - response = Mock() - response.content = [self._tool_use_block("unknown_tool", tool_id="tu_err")] - api._dispatch_tool_use(response) - - content = api.messages[1]["content"][0]["content"] - assert "Error" in content - assert "unknown_tool" in content - - def test_dispatch_tool_exception_returns_error_message(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.side_effect = RuntimeError("disk full") - api = AnthropicApi(system_prompt="test", native_tools=[tool]) - - response = Mock() - response.content = [self._tool_use_block("memory", tool_id="tu_exc")] - api._dispatch_tool_use(response) - - content = api.messages[1]["content"][0]["content"] - assert "Error" in content - assert "disk full" in content - - def test_dispatch_skips_non_tool_use_content_blocks(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "result" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_skips_non_tool_use_content_blocks(self): + api = AnthropicApi(system_prompt="test") response = Mock() response.content = [ self._text_block("thinking..."), self._tool_use_block("memory", tool_id="tu_only"), ] - api._dispatch_tool_use(response) + api._append_tool_result(response, "result") tool_results = api.messages[1]["content"] assert len(tool_results) == 1 assert tool_results[0]["tool_use_id"] == "tu_only" - def test_dispatch_handles_multiple_tool_use_blocks(self): - tool1 = Mock() - tool1.to_dict.return_value = {"name": "memory"} - tool1.call.return_value = "memory result" - tool2 = Mock() - tool2.to_dict.return_value = {"name": "bash"} - tool2.call.return_value = "bash result" - api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2]) + def test_handles_multiple_tool_use_blocks(self): + api = AnthropicApi(system_prompt="test") response = Mock() response.content = [ self._tool_use_block("memory", tool_id="id_1"), self._tool_use_block("bash", tool_id="id_2"), ] - api._dispatch_tool_use(response) + api._append_tool_result(response, "combined result") results = api.messages[1]["content"] assert len(results) == 2 assert results[0]["tool_use_id"] == "id_1" - assert results[0]["content"] == "memory result" + assert results[0]["content"] == "combined result" assert results[1]["tool_use_id"] == "id_2" - assert results[1]["content"] == "bash result" + assert results[1]["content"] == "combined result" @pytest.mark.unit -class TestAnthropicApiAskWithToolUseLoop: - """Tests for ask() cycling through tool_use rounds before returning JSON.""" +class TestAnthropicApiAskWithToolUse: + """Tests for ask() returning tool_use as LLMAskResponse and accepting tool results.""" @pytest.fixture(autouse=True) def _use_patch(self, patch_anthropic_config): pass @staticmethod - def _tool_use_response(tool_name, tool_id): + def _tool_use_response(tool_name, tool_id, input_data=None): block = Mock() block.type = "tool_use" block.name = tool_name block.id = tool_id - block.input = {} + block.input = input_data or {} block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": tool_name} response = Mock() response.stop_reason = "tool_use" @@ -818,40 +717,61 @@ def _text_response(json_dict): response.content = [block] return response - def test_ask_dispatches_one_tool_use_round_then_returns(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "viewed /memories" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_ask_returns_tool_use_as_ask_response(self): + dicts = [{"name": "memory", "type": "memory_20250818"}] + api = AnthropicApi(system_prompt="test") + api._tool_dicts = dicts - tool_resp = self._tool_use_response("memory", "tu_1") - final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""}) - api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp]) + tool_resp = self._tool_use_response("memory", "tu_1", {"command": "view", "path": "/memories"}) + api.ai_client.messages.create = Mock(return_value=tool_resp) result = api.ask("do the task") - assert api.ai_client.messages.create.call_count == 2 - tool.call.assert_called_once() - assert result.command == "ls /" + assert result.task_done is False + assert '"native_tool_calls"' in result.command + parsed = json.loads(result.command) + assert parsed["native_tool_calls"][0]["name"] == "memory" + assert parsed["native_tool_calls"][0]["id"] == "tu_1" + assert api._pending_tool_response is tool_resp - def test_ask_dispatches_multiple_tool_use_rounds(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "ok" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_ask_stores_pending_tool_response(self): + api = AnthropicApi(system_prompt="test") + api._tool_dicts = [{"name": "memory"}] - tool_resp1 = self._tool_use_response("memory", "tu_1") - tool_resp2 = self._tool_use_response("memory", "tu_2") - final_resp = self._text_response({"task_done": True, "command": "", "thoughts": "done"}) - api.ai_client.messages.create = Mock(side_effect=[tool_resp1, tool_resp2, final_resp]) + tool_resp = self._tool_use_response("memory", "tu_1") + api.ai_client.messages.create = Mock(return_value=tool_resp) - result = api.ask("do the task") + api.ask("do it") - assert api.ai_client.messages.create.call_count == 3 - assert tool.call.call_count == 2 - assert result.task_done is True + assert api._pending_tool_response is tool_resp - def test_ask_without_tool_use_does_not_dispatch(self): + def test_ask_with_pending_tool_response_formats_tool_result(self): + api = AnthropicApi(system_prompt="test") + api._tool_dicts = [{"name": "memory"}] + + tool_resp = self._tool_use_response("memory", "tu_1") + final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""}) + api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp]) + + # First ask — returns tool_use + api.ask("do the task") + + # Second ask — sends tool result, formats as tool_result + result = api.ask("viewed /memories") + + assert result.command == "ls /" + assert api._pending_tool_response is None + + # Check messages contain the tool_result + tool_result_msgs = [ + m for m in api.messages + if m["role"] == "user" and isinstance(m["content"], list) + ] + assert len(tool_result_msgs) == 1 + assert tool_result_msgs[0]["content"][0]["type"] == "tool_result" + assert tool_result_msgs[0]["content"][0]["tool_use_id"] == "tu_1" + + def test_ask_without_tool_use_works_normally(self): api = AnthropicApi(system_prompt="test") final_resp = self._text_response({"task_done": False, "command": "pwd", "thoughts": ""}) @@ -861,23 +781,34 @@ def test_ask_without_tool_use_does_not_dispatch(self): assert api.ai_client.messages.create.call_count == 1 assert result.command == "pwd" + assert api._pending_tool_response is None - def test_ask_tool_use_messages_are_added_to_history(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "result" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_ask_extracts_thoughts_from_tool_use_response(self): + api = AnthropicApi(system_prompt="test") + api._tool_dicts = [{"name": "memory"}] - tool_resp = self._tool_use_response("memory", "tu_1") - final_resp = self._text_response({"task_done": False, "command": "echo hi", "thoughts": ""}) - api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp]) + # Build a tool_use response with a text block for thoughts + text_block = Mock() + text_block.type = "text" + text_block.text = "Let me check memory first" + text_block.model_dump.return_value = {"type": "text", "text": text_block.text} - api.ask("do it") + tool_block = Mock() + tool_block.type = "tool_use" + tool_block.name = "memory" + tool_block.id = "tu_1" + tool_block.input = {} + tool_block.model_dump.return_value = {"type": "tool_use", "id": "tu_1", "name": "memory"} + + response = Mock() + response.stop_reason = "tool_use" + response.content = [text_block, tool_block] + + api.ai_client.messages.create = Mock(return_value=response) + + result = api.ask("do the task") - # Messages: user, assistant(tool_use), user(tool_result), assistant(final json) - roles = [m["role"] for m in api.messages] - assert roles.count("user") == 2 - assert roles.count("assistant") == 2 + assert result.thoughts == "Let me check memory first" if __name__ == "__main__": diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py index db7069e..ab27a68 100644 --- a/test/tools/tool_definitions/test_anthropic_memory_tool.py +++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py @@ -101,7 +101,19 @@ def test_returns_false_for_non_claude_models(self, tmp_path): @pytest.mark.unit class TestAnthropicMemoryToolIsInvoked: - def test_always_returns_false(self, tmp_path): + def test_returns_true_for_native_tool_calls_with_memory(self, tmp_path): + tool = make_tool(tmp_path) + import json + cmd = json.dumps({"native_tool_calls": [{"name": "memory", "id": "tu_1", "input": {}}]}) + assert tool.is_invoked(cmd) is True + + def test_returns_false_for_native_tool_calls_without_memory(self, tmp_path): + tool = make_tool(tmp_path) + import json + cmd = json.dumps({"native_tool_calls": [{"name": "bash", "id": "tu_1", "input": {}}]}) + assert tool.is_invoked(cmd) is False + + def test_returns_false_for_plain_commands(self, tmp_path): tool = make_tool(tmp_path) for cmd in ("memory view /memories", "memory clear", "anything", ""): assert tool.is_invoked(cmd) is False From c7d2657a448580e706c7e9d4b203e4ae19ccb056 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Wed, 11 Mar 2026 06:15:00 +0000 Subject: [PATCH 09/10] enhance MemoryTool path validation and logging --- .../tools/tool_definitions/__init__.py | 2 + .../tools/tool_definitions/memory_tool.py | 17 ++++-- test/llm/test_llm.py | 30 +++++++++- .../test_anthropic_memory_tool.py | 55 +++++++++++++++++++ 4 files changed, 98 insertions(+), 6 deletions(-) create mode 100644 src/microbots/tools/tool_definitions/__init__.py diff --git a/src/microbots/tools/tool_definitions/__init__.py b/src/microbots/tools/tool_definitions/__init__.py new file mode 100644 index 0000000..88acf0d --- /dev/null +++ b/src/microbots/tools/tool_definitions/__init__.py @@ -0,0 +1,2 @@ +from microbots.tools.tool_definitions.memory_tool import MemoryTool +from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool \ No newline at end of file diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py index 310f880..b63f3ff 100644 --- a/src/microbots/tools/tool_definitions/memory_tool.py +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -1,4 +1,5 @@ import logging +import os import shlex import shutil from pathlib import Path @@ -110,19 +111,22 @@ def is_model_supported(self, model_name: str) -> bool: def _resolve(self, path: str) -> Path: """Resolve a /memories/… path to an absolute host path.""" stripped = path.lstrip("/") + + # Reject any path containing '..' components before resolving + if ".." in Path(stripped).parts: + raise ValueError(f"Path traversal not allowed: {path!r}") + if stripped == "memories": rel = "" elif stripped.startswith("memories/"): rel = stripped[len("memories/"):] - elif stripped.startswith(("workdir", "home", "tmp", "var", "etc", "usr")): - raise ValueError( - f"Invalid memory path: {path!r}. Use paths under /memories/." - ) else: rel = stripped # treat as relative to memory_dir resolved = (self._memory_dir / rel).resolve() if rel else self._memory_dir.resolve() - if not str(resolved).startswith(str(self._memory_dir.resolve())): + # Use trailing separator to prevent prefix confusion with sibling dirs + memory_root = str(self._memory_dir.resolve()) + if resolved != self._memory_dir.resolve() and not str(resolved).startswith(memory_root + os.sep): raise ValueError(f"Path traversal not allowed: {path!r}") return resolved @@ -184,6 +188,7 @@ def _view(self, args: list) -> CmdReturn: elif args[i] == "--end" and i + 1 < len(args): end_line = int(args[i + 1]); i += 2 else: + logger.warning("🧠 MemoryTool view: unknown flag %r (skipped)", args[i]) i += 1 resolved = self._resolve(path) @@ -232,6 +237,7 @@ def _str_replace(self, args: list) -> CmdReturn: elif args[i] == "--new" and i + 1 < len(args): new_text = args[i + 1]; i += 2 else: + logger.warning("🧠 MemoryTool str_replace: unknown flag %r (skipped)", args[i]) i += 1 if old_text is None or new_text is None: return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1) @@ -259,6 +265,7 @@ def _insert(self, args: list) -> CmdReturn: elif args[i] == "--text" and i + 1 < len(args): text = args[i + 1]; i += 2 else: + logger.warning("🧠 MemoryTool insert: unknown flag %r (skipped)", args[i]) i += 1 if line_num is None or text is None: return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1) diff --git a/test/llm/test_llm.py b/test/llm/test_llm.py index bd1b82d..af5a7d2 100644 --- a/test/llm/test_llm.py +++ b/test/llm/test_llm.py @@ -742,4 +742,32 @@ def test_task_done_true_with_not_none_command_field(self, llm): assert llm_response is None assert llm.retries == 1 assert len(llm.messages) == 1 - assert "When 'task_done' is true, 'command' should be an empty string." in llm.messages[0]["content"] \ No newline at end of file + + +@pytest.mark.unit +class TestUpgradeToolsDefault: + """Tests for the default LLMInterface.upgrade_tools no-op implementation.""" + + @pytest.fixture + def llm(self): + return ConcreteLLM() + + def test_returns_same_list(self, llm): + """Default upgrade_tools returns the input list unchanged.""" + tools = ["tool_a", "tool_b"] + result = llm.upgrade_tools(tools) + assert result is tools + + def test_empty_list(self, llm): + """Default upgrade_tools handles an empty list.""" + tools = [] + result = llm.upgrade_tools(tools) + assert result == [] + + def test_preserves_tool_order_and_identity(self, llm): + """Default upgrade_tools does not reorder or copy elements.""" + sentinel = object() + tools = [sentinel, "other"] + result = llm.upgrade_tools(tools) + assert result[0] is sentinel + assert result[1] == "other" \ No newline at end of file diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py index ab27a68..81db12c 100644 --- a/test/tools/tool_definitions/test_anthropic_memory_tool.py +++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py @@ -364,3 +364,58 @@ def test_rename_raises_runtime_error_on_failure(self, tmp_path): ) with pytest.raises(RuntimeError): tool.rename(cmd) + + +# --------------------------------------------------------------------------- +# invoke — non-memory tool calls are skipped +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolInvoke: + + def test_invoke_skips_non_memory_tool_calls(self, tmp_path): + """The ``if tc["name"] != "memory": continue`` branch is exercised + when native_tool_calls contains a non-memory tool.""" + import json + from unittest.mock import Mock + + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello") + + command = json.dumps({ + "native_tool_calls": [ + {"name": "bash", "id": "tu_1", "input": {"command": "ls"}}, + {"name": "memory", "id": "tu_2", "input": { + "command": "view", "path": "/memories/f.md", "view_range": None, + }}, + ] + }) + + result = tool.invoke(command, parent_bot=Mock()) + + assert result.return_code == 0 + # Only the memory call should produce output; bash should be skipped + assert "hello" in result.stdout + + def test_invoke_catches_exception_from_tool_call(self, tmp_path): + """The ``except Exception`` branch is exercised when tool.call() raises.""" + import json + from unittest.mock import Mock, patch + + tool = make_tool(tmp_path) + + command = json.dumps({ + "native_tool_calls": [ + {"name": "memory", "id": "tu_1", "input": { + "command": "view", "path": "/memories/nonexistent.md", "view_range": None, + }}, + ] + }) + + # Force call() to raise an exception + with patch.object(tool, "call", side_effect=RuntimeError("boom")): + result = tool.invoke(command, parent_bot=Mock()) + + assert result.return_code == 0 + assert "Error executing tool 'memory'" in result.stdout + assert "boom" in result.stdout From 3067b59e77f40197fc0c70ae101205a2dd13ec61 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Wed, 11 Mar 2026 07:39:00 +0000 Subject: [PATCH 10/10] enhance path validation --- src/microbots/tools/tool_definitions/memory_tool.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py index b63f3ff..8131b8b 100644 --- a/src/microbots/tools/tool_definitions/memory_tool.py +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -101,9 +101,6 @@ def __post_init__(self): self._memory_dir = base self._memory_dir.mkdir(parents=True, exist_ok=True) - def is_model_supported(self, model_name: str) -> bool: - return True - # ---------------------------------------------------------------------- # # Path helpers # ---------------------------------------------------------------------- # @@ -116,6 +113,11 @@ def _resolve(self, path: str) -> Path: if ".." in Path(stripped).parts: raise ValueError(f"Path traversal not allowed: {path!r}") + if path.startswith("/") and stripped != "memories" and not stripped.startswith("memories/"): + raise ValueError( + f"Invalid memory path: {path!r}. Use paths under /memories/." + ) + if stripped == "memories": rel = "" elif stripped.startswith("memories/"):