From 1c9b80920ce01333af12c6cd910fc7ab1bc91c89 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Tue, 3 Mar 2026 10:37:45 +0000 Subject: [PATCH 01/20] Implement generic Memory Tool and Anthropic-native memory tool --- src/microbots/MicroBot.py | 41 ++- src/microbots/llm/anthropic_api.py | 123 ++++++- .../tool_definitions/anthropic_memory_tool.py | 184 +++++++++++ .../tools/tool_definitions/memory_tool.py | 312 ++++++++++++++++++ 4 files changed, 647 insertions(+), 13 deletions(-) create mode 100644 src/microbots/tools/tool_definitions/anthropic_memory_tool.py create mode 100644 src/microbots/tools/tool_definitions/memory_tool.py diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py index 3ace5a4..7b061d7 100644 --- a/src/microbots/MicroBot.py +++ b/src/microbots/MicroBot.py @@ -317,7 +317,38 @@ def _create_environment(self, folder_to_mount: Optional[Mount]): folder_to_mount=folder_to_mount, ) + def _upgrade_tools_for_provider(self): + """Auto-upgrade provider-agnostic tools to their provider-optimised variants. + + Currently: replaces any ``MemoryTool`` with ``AnthropicMemoryTool`` when + the provider is Anthropic so the model gets native structured tool-use + instead of the text-command loop. The ``memory_dir`` and any custom + ``usage_instructions_to_llm`` are forwarded to the upgraded instance. + """ + if self.model_provider != ModelProvider.ANTHROPIC: + return + + # Local imports to avoid pulling Anthropic SDK into non-Anthropic paths + from microbots.tools.tool_definitions.memory_tool import MemoryTool + from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + upgraded = [] + for tool in self.additional_tools: + if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool): + logger.info( + "🧠 Auto-upgrading MemoryTool → AnthropicMemoryTool for Anthropic provider" + ) + upgraded.append(AnthropicMemoryTool( + memory_dir=tool.memory_dir, + usage_instructions=tool.usage_instructions_to_llm, + )) + else: + upgraded.append(tool) + self.additional_tools = upgraded + def _create_llm(self): + self._upgrade_tools_for_provider() + # Append tool usage instructions to system prompt system_prompt_with_tools = self.system_prompt if self.system_prompt else "" if self.additional_tools: @@ -334,8 +365,16 @@ def _create_llm(self): system_prompt=system_prompt_with_tools, model_name=self.deployment_name ) elif self.model_provider == ModelProvider.ANTHROPIC: + # Detect Anthropic-native tools (e.g. AnthropicMemoryTool) by duck-typing: + # any tool that exposes both to_dict() and call() is a native Anthropic tool. + native_tools = [ + t for t in self.additional_tools + if callable(getattr(t, "to_dict", None)) and callable(getattr(t, "call", None)) + ] self.llm = AnthropicApi( - system_prompt=system_prompt_with_tools, deployment_name=self.deployment_name + system_prompt=system_prompt_with_tools, + deployment_name=self.deployment_name, + native_tools=native_tools or None, ) # No Else case required as model provider is already validated using _validate_model_and_provider diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py index f40118a..4a73ba5 100644 --- a/src/microbots/llm/anthropic_api.py +++ b/src/microbots/llm/anthropic_api.py @@ -1,7 +1,9 @@ import json import os +import re from dataclasses import asdict from logging import getLogger +from typing import List, Optional from dotenv import load_dotenv from anthropic import Anthropic @@ -16,9 +18,31 @@ api_key = os.getenv("ANTHROPIC_API_KEY") + class AnthropicApi(LLMInterface): - def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3): + def __init__( + self, + system_prompt: str, + deployment_name: str = deployment_name, + max_retries: int = 3, + native_tools: Optional[List] = None, + ): + """ + Parameters + ---------- + system_prompt : str + System prompt for the LLM. + deployment_name : str + The Anthropic model deployment name. + max_retries : int + Maximum number of retries for invalid LLM responses. + native_tools : Optional[List] + Anthropic-native tool objects (e.g. ``AnthropicMemoryTool``) that + have both a ``to_dict()`` and a ``call()`` method. These are passed + directly to the API and their tool-use blocks are dispatched here + before the JSON response is returned to the caller. + """ self.ai_client = Anthropic( api_key=api_key, base_url=endpoint @@ -26,31 +50,106 @@ def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3 self.deployment_name = deployment_name self.system_prompt = system_prompt self.messages = [] + self.native_tools = native_tools or [] + # Cache tool dicts once so _call_api and _dispatch_tool_use don't + # re-serialise on every invocation (important when multiple native + # tools are registered, e.g. memory + bash). + self._native_tool_dicts = [t.to_dict() for t in self.native_tools] + self._native_tools_by_name = {d["name"]: t for d, t in zip(self._native_tool_dicts, self.native_tools)} # Set these values here. This logic will be handled in the parent class. self.max_retries = max_retries self.retries = 0 - def ask(self, message) -> LLMAskResponse: + # ---------------------------------------------------------------------- # + # Internal helpers + # ---------------------------------------------------------------------- # + + def _call_api(self) -> object: + """Call the Anthropic messages API, including native tools when present.""" + kwargs = dict( + model=self.deployment_name, + system=self.system_prompt, + messages=self.messages, + max_tokens=4096, + ) + + if self.native_tools: + kwargs["tools"] = self._native_tool_dicts + + return self.ai_client.messages.create(**kwargs) + + def _dispatch_tool_use(self, response) -> None: + """Handle a tool_use response: execute each tool call and append results. + + Mutates ``self.messages`` in place — appends the assistant turn (with + all content blocks) and the corresponding tool_result user turn. + """ + # Append the full assistant message as-is (content is a list of blocks) + assistant_content = [block.model_dump() for block in response.content] + self.messages.append({"role": "assistant", "content": assistant_content}) + + # Build tool_result entries for every tool_use block + tool_results = [] + for block in response.content: + if block.type != "tool_use": + continue + + # Find the matching native tool by name + tool = self._native_tools_by_name.get(block.name) + if tool is None: + result_text = f"Error: unknown tool '{block.name}'" + logger.error("Received tool_use for unknown tool: %s", block.name) + else: + try: + result_text = tool.call(block.input) + logger.info( + "🧠 Native tool '%s' executed. Result (first 200 chars): %s", + block.name, + str(result_text)[:200], + ) + except Exception as exc: + result_text = f"Error executing tool '{block.name}': {exc}" + logger.error("Native tool '%s' raised: %s", block.name, exc) + + tool_results.append({ + "type": "tool_result", + "tool_use_id": block.id, + "content": str(result_text), + }) + + self.messages.append({"role": "user", "content": tool_results}) + + # ---------------------------------------------------------------------- # + # Public interface + # ---------------------------------------------------------------------- # + + def ask(self, message: str) -> LLMAskResponse: self.retries = 0 # reset retries for each ask. Handled in parent class. self.messages.append({"role": "user", "content": message}) valid = False while not valid: - response = self.ai_client.messages.create( - model=self.deployment_name, - system=self.system_prompt, - messages=self.messages, - max_tokens=4096, - ) - - # Extract text content from response - response_text = response.content[0].text if response.content else "" + response = self._call_api() + + # Dispatch any tool_use rounds before looking for a JSON response. + # The model may call the memory tool multiple times before producing + # its final JSON command. + while response.stop_reason == "tool_use": + self._dispatch_tool_use(response) + response = self._call_api() + + # Extract text content from the final response + response_text = "" + for block in response.content: + if block.type == "text": + response_text = block.text + break + logger.debug("Raw Anthropic response (first 500 chars): %s", response_text[:500]) # Try to extract JSON if wrapped in markdown code blocks - import re json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL) if json_match: response_text = json_match.group(1) diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py new file mode 100644 index 0000000..e80750c --- /dev/null +++ b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py @@ -0,0 +1,184 @@ +""" +AnthropicMemoryTool — wraps Anthropic's memory tool. + +The memory tool lets the model persist information across conversations by +reading and writing files in a local memory directory. When the model invokes +the tool, it sends a command (view, create, str_replace, insert, delete, +rename) and the client executes it against a local filesystem directory. + +This implementation extends both: + - ``MemoryTool``: provides all file-operation logic (_resolve, _view, + _create, _str_replace, _insert, _delete, _rename, _clear) and the + ToolAbstract duck-typing interface. + - ``BetaAbstractMemoryTool`` (SDK): provides native Anthropic dispatch and + the ``to_dict()`` / ``call()`` interface required by AnthropicApi. + +The SDK command-handler overrides (view, create, str_replace, insert, delete, +rename) simply translate SDK command objects → arg lists and delegate to the +inherited MemoryTool private methods, converting the CmdReturn back to a +string as the SDK expects. + +The memory tool (type ``memory_20250818``) is available in the standard +Anthropic library and does not require a beta endpoint or header. Pass it +via ``tools=[{"type": "memory_20250818", "name": "memory"}]`` on a regular +``client.messages.create(...)`` call. ``AnthropicApi`` handles this +automatically when ``native_tools`` contains an ``AnthropicMemoryTool``. + +Usage: + from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + memory = AnthropicMemoryTool() + bot = ReadingBot(..., additional_tools=[memory]) +""" + +from __future__ import annotations + +from logging import getLogger +from pathlib import Path + +from typing_extensions import override + +from anthropic.lib.tools import BetaAbstractMemoryTool as _SDKMemoryTool +from anthropic.types.beta import ( + BetaMemoryTool20250818CreateCommand, + BetaMemoryTool20250818DeleteCommand, + BetaMemoryTool20250818InsertCommand, + BetaMemoryTool20250818RenameCommand, + BetaMemoryTool20250818StrReplaceCommand, + BetaMemoryTool20250818ViewCommand, +) + +from microbots.tools.tool_definitions.memory_tool import MemoryTool + +logger = getLogger(__name__) + +DEFAULT_MEMORY_INSTRUCTIONS = ( + "MEMORY PROTOCOL:\n" + "1. ALWAYS view your memory directory BEFORE doing anything else " + "using the `view` command of your `memory` tool to check for earlier progress.\n" + "2. As you make progress on the task, record status, progress, " + "and key findings in your memory using the memory tool.\n" + "3. ASSUME INTERRUPTION: Your context window might be reset at any moment, " + "so you risk losing any progress that is not recorded in your memory directory.\n" + "4. Before completing a task, always save your final results and analysis to memory.\n" + "5. When editing your memory folder, always keep its content up-to-date, coherent " + "and organized. Rename or delete files that are no longer relevant. " + "Do not create new files unless necessary.\n\n" + "IMPORTANT: The memory tool ONLY works with paths under /memories/. " + "Do NOT use the memory tool to access the repository or workdir. " + "Use shell commands (ls, cat, etc.) for filesystem access." +) + + +class AnthropicMemoryTool(MemoryTool, _SDKMemoryTool): + """ + Anthropic's built-in memory tool, backed by MemoryTool's file logic. + + Inherits file-operation logic from ``MemoryTool`` (plain Python class) and + the SDK's native dispatch interface from ``BetaAbstractMemoryTool``. + + The SDK command-handler overrides delegate to the inherited private methods + (``_view``, ``_create``, etc.), translating the SDK ``Command`` objects to + the ``args: list`` format that those methods expect, and converting the + returned ``CmdReturn`` to the string that the SDK API requires. + + Parameters + ---------- + memory_dir : str | Path | None + Root directory for memory files. Defaults to ``~/.microbots/memory``. + usage_instructions : str | None + Custom instructions appended to the system prompt for the LLM. + Defaults to ``DEFAULT_MEMORY_INSTRUCTIONS``. + """ + + def __init__( + self, + memory_dir: str | Path | None = None, + usage_instructions: str | None = None, + ) -> None: + MemoryTool.__init__( + self, + memory_dir=str(memory_dir) if memory_dir else None, + usage_instructions_to_llm=( + usage_instructions + if usage_instructions is not None + else DEFAULT_MEMORY_INSTRUCTIONS + ), + ) + _SDKMemoryTool.__init__(self) # type: ignore[call-arg] + + # ---------------------------------------------------------------------- # + # ToolAbstract duck-typing overrides + # ---------------------------------------------------------------------- # + + def is_model_supported(self, model_name: str) -> bool: + """Only Anthropic (Claude) models support the native memory tool.""" + return "claude" in model_name.lower() + + def is_invoked(self, command: str) -> bool: + """Return False — this tool is dispatched natively by AnthropicApi, + not via the shell command loop.""" + return False + + def clear_all(self) -> None: + """Delete all memory files (useful for testing or resetting state).""" + self._clear() + logger.info("🧠 AnthropicMemoryTool: memory cleared at %s", self._memory_dir) + + # ---------------------------------------------------------------------- # + # BetaAbstractMemoryTool overrides — delegate to MemoryTool private methods + # ---------------------------------------------------------------------- # + + @override + def clear_all_memory(self) -> str: + self.clear_all() + return "All memory cleared" + + @override + def view(self, command: BetaMemoryTool20250818ViewCommand) -> str: + args = [command.path] + if command.view_range: + args += ["--start", str(command.view_range[0]), "--end", str(command.view_range[1])] + result = self._view(args) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return result.stdout + + @override + def create(self, command: BetaMemoryTool20250818CreateCommand) -> str: + result = self._create([command.path, command.file_text]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return f"File created successfully at {command.path}" + + @override + def str_replace(self, command: BetaMemoryTool20250818StrReplaceCommand) -> str: + result = self._str_replace([command.path, "--old", command.old_str, "--new", command.new_str]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return f"File {command.path} has been edited" + + @override + def insert(self, command: BetaMemoryTool20250818InsertCommand) -> str: + result = self._insert([ + command.path, + "--line", str(command.insert_line), + "--text", command.insert_text, + ]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return f"Text inserted at line {command.insert_line} in {command.path}" + + @override + def delete(self, command: BetaMemoryTool20250818DeleteCommand) -> str: + result = self._delete([command.path]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return result.stdout + + @override + def rename(self, command: BetaMemoryTool20250818RenameCommand) -> str: + result = self._rename([command.old_path, command.new_path]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return result.stdout diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py new file mode 100644 index 0000000..912fe6d --- /dev/null +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -0,0 +1,312 @@ +import logging +import shlex +import shutil +from pathlib import Path +from typing import Optional + +from pydantic.dataclasses import dataclass, Field + +from microbots.environment.Environment import CmdReturn +from microbots.tools.external_tool import ExternalTool + +logger = logging.getLogger(" 🧠 MemoryTool") + +INSTRUCTIONS_TO_LLM = """ +Use this tool to persist information to files across steps — same interface as +the Anthropic memory tool. All paths must be under /memories/. + +MEMORY PROTOCOL: +1. ALWAYS run `memory view /memories` BEFORE doing anything else to check for + earlier progress. +2. Record status, findings and intermediate results as you go. +3. Before completing a task, save your final results to memory. +4. Keep the memory folder organised — rename or delete stale files. + +## Commands + +View a file or list a directory: + memory view + memory view --start --end + +Create a file: + memory create + +Replace a unique string in a file: + memory str_replace --old "" --new "" + +Insert a line into a file (0 = prepend): + memory insert --line --text "" + +Delete a file or directory: + memory delete + +Rename / move a file: + memory rename + +Clear all memory: + memory clear + +## Examples + + memory view /memories + memory create /memories/progress.md "## Progress\\n- Found bug in src/foo.py line 42" + memory str_replace /memories/progress.md --old "line 42" --new "line 45" + memory insert /memories/progress.md --line 0 --text "# Task Notes" + memory view /memories/progress.md --start 1 --end 10 + memory delete /memories/old_notes.md + memory rename /memories/draft.md /memories/final.md + +## Notes +- Paths must start with /memories/. +- memory create overwrites if the file already exists. +- memory str_replace requires the old text to appear exactly once. +""" + + +@dataclass +class MemoryTool(ExternalTool): + """ + File-backed memory tool that mirrors the ``AnthropicMemoryTool`` interface + but dispatches through the text command loop (compatible with all providers). + + Subclass of ``ExternalTool`` — all command lists are empty so + ``install_tool``, ``setup_tool``, ``verify_tool_installation``, and + ``uninstall_tool`` are all effective no-ops inherited from ``ExternalTool``. + + All files are stored under ``memory_dir`` on the host (default + ``~/.microbots/memory``). The LLM uses paths like ``/memories/notes.md`` + which are resolved relative to ``memory_dir``. + + Supported subcommands + --------------------- + memory view [--start N] [--end N] + memory create + memory str_replace --old --new + memory insert --line N --text + memory delete + memory rename + memory clear + """ + + name: str = Field(default="memory") + description: str = Field( + default="File-backed memory store — view, create, edit, delete files under /memories/." + ) + usage_instructions_to_llm: str = Field(default=INSTRUCTIONS_TO_LLM) + memory_dir: Optional[str] = Field(default=None) + + def __post_init__(self): + base = Path(self.memory_dir) if self.memory_dir else Path.home() / ".microbots" / "memory" + self._memory_dir = base + self._memory_dir.mkdir(parents=True, exist_ok=True) + + def is_model_supported(self, model_name: str) -> bool: + return True + + # ---------------------------------------------------------------------- # + # Path helpers + # ---------------------------------------------------------------------- # + + def _resolve(self, path: str) -> Path: + """Resolve a /memories/… path to an absolute host path.""" + stripped = path.lstrip("/") + if stripped == "memories": + rel = "" + elif stripped.startswith("memories/"): + rel = stripped[len("memories/"):] + elif stripped.startswith(("workdir", "home", "tmp", "var", "etc", "usr")): + raise ValueError( + f"Invalid memory path: {path!r}. Use paths under /memories/." + ) + else: + rel = stripped # treat as relative to memory_dir + + resolved = (self._memory_dir / rel).resolve() if rel else self._memory_dir.resolve() + if not str(resolved).startswith(str(self._memory_dir.resolve())): + raise ValueError(f"Path traversal not allowed: {path!r}") + return resolved + + # ---------------------------------------------------------------------- # + # ToolAbstract interface + # ---------------------------------------------------------------------- # + + def is_invoked(self, command: str) -> bool: + return command.strip().startswith("memory ") + + def invoke(self, command: str, parent_bot) -> CmdReturn: + try: + tokens = shlex.split(command) + except ValueError as exc: + return CmdReturn(stdout="", stderr=f"Parse error: {exc}", return_code=1) + + if len(tokens) < 2: + return CmdReturn(stdout="", stderr="Usage: memory ...", return_code=1) + + sub = tokens[1] + args = tokens[2:] + + try: + if sub == "view": + return self._view(args) + elif sub == "create": + return self._create(args) + elif sub == "str_replace": + return self._str_replace(args) + elif sub == "insert": + return self._insert(args) + elif sub == "delete": + return self._delete(args) + elif sub == "rename": + return self._rename(args) + elif sub == "clear": + return self._clear() + else: + return CmdReturn(stdout="", stderr=f"Unknown subcommand: {sub!r}", return_code=1) + except (ValueError, FileNotFoundError, RuntimeError) as exc: + logger.error("🧠 MemoryTool error: %s", exc) + return CmdReturn(stdout="", stderr=str(exc), return_code=1) + + # ---------------------------------------------------------------------- # + # Subcommand handlers + # ---------------------------------------------------------------------- # + + def _view(self, args: list) -> CmdReturn: + if not args: + return CmdReturn(stdout="", stderr="Usage: memory view [--start N] [--end N]", return_code=1) + + path = args[0] + start_line = None + end_line = None + i = 1 + while i < len(args): + if args[i] == "--start" and i + 1 < len(args): + start_line = int(args[i + 1]); i += 2 + elif args[i] == "--end" and i + 1 < len(args): + end_line = int(args[i + 1]); i += 2 + else: + i += 1 + + resolved = self._resolve(path) + if not resolved.exists(): + raise RuntimeError(f"Path not found: {path!r}") + + if resolved.is_dir(): + items = [ + (f"{item.name}/" if item.is_dir() else item.name) + for item in sorted(resolved.iterdir()) + if not item.name.startswith(".") + ] + result = f"Directory: {path}\n" + "\n".join(f"- {i}" for i in items) + return CmdReturn(stdout=result, stderr="", return_code=0) + + lines = resolved.read_text(encoding="utf-8").splitlines() + if start_line is not None or end_line is not None: + s = max(0, (start_line or 1) - 1) + e = len(lines) if (end_line is None or end_line == -1) else end_line + lines = lines[s:e] + base_num = s + 1 + else: + base_num = 1 + numbered = "\n".join(f"{i + base_num:4d}: {line}" for i, line in enumerate(lines)) + return CmdReturn(stdout=numbered, stderr="", return_code=0) + + def _create(self, args: list) -> CmdReturn: + if len(args) < 2: + return CmdReturn(stdout="", stderr="Usage: memory create ", return_code=1) + path, content = args[0], args[1] + resolved = self._resolve(path) + resolved.parent.mkdir(parents=True, exist_ok=True) + resolved.write_text(content, encoding="utf-8") + logger.info("🧠 Memory file created: %s", path) + return CmdReturn(stdout=f"File created: {path}", stderr="", return_code=0) + + def _str_replace(self, args: list) -> CmdReturn: + if not args: + return CmdReturn(stdout="", stderr="Usage: memory str_replace --old --new ", return_code=1) + path = args[0] + old_text = new_text = None + i = 1 + while i < len(args): + if args[i] == "--old" and i + 1 < len(args): + old_text = args[i + 1]; i += 2 + elif args[i] == "--new" and i + 1 < len(args): + new_text = args[i + 1]; i += 2 + else: + i += 1 + if old_text is None or new_text is None: + return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1) + resolved = self._resolve(path) + if not resolved.is_file(): + raise FileNotFoundError(f"File not found: {path!r}") + content = resolved.read_text(encoding="utf-8") + count = content.count(old_text) + if count == 0: + raise ValueError(f"Text not found in {path!r}") + if count > 1: + raise ValueError(f"Text appears {count} times in {path!r} — must be unique") + resolved.write_text(content.replace(old_text, new_text, 1), encoding="utf-8") + return CmdReturn(stdout=f"File {path} edited.", stderr="", return_code=0) + + def _insert(self, args: list) -> CmdReturn: + if not args: + return CmdReturn(stdout="", stderr="Usage: memory insert --line N --text ", return_code=1) + path = args[0] + line_num = text = None + i = 1 + while i < len(args): + if args[i] == "--line" and i + 1 < len(args): + line_num = int(args[i + 1]); i += 2 + elif args[i] == "--text" and i + 1 < len(args): + text = args[i + 1]; i += 2 + else: + i += 1 + if line_num is None or text is None: + return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1) + resolved = self._resolve(path) + if not resolved.is_file(): + raise FileNotFoundError(f"File not found: {path!r}") + lines = resolved.read_text(encoding="utf-8").splitlines() + if line_num < 0 or line_num > len(lines): + raise ValueError(f"Invalid line number {line_num}. Must be 0–{len(lines)}.") + lines.insert(line_num, text.rstrip("\n")) + resolved.write_text("\n".join(lines) + "\n", encoding="utf-8") + return CmdReturn(stdout=f"Text inserted at line {line_num} in {path}.", stderr="", return_code=0) + + def _delete(self, args: list) -> CmdReturn: + if not args: + return CmdReturn(stdout="", stderr="Usage: memory delete ", return_code=1) + path = args[0] + if path.rstrip("/") in ("/memories", "memories", ""): + raise ValueError("Cannot delete the /memories root directory") + resolved = self._resolve(path) + if resolved.is_file(): + resolved.unlink() + logger.info("🧠 Memory file deleted: %s", path) + return CmdReturn(stdout=f"Deleted: {path}", stderr="", return_code=0) + if resolved.is_dir(): + shutil.rmtree(resolved) + logger.info("🧠 Memory directory deleted: %s", path) + return CmdReturn(stdout=f"Deleted directory: {path}", stderr="", return_code=0) + raise FileNotFoundError(f"Path not found: {path!r}") + + def _rename(self, args: list) -> CmdReturn: + if len(args) < 2: + return CmdReturn(stdout="", stderr="Usage: memory rename ", return_code=1) + old_path, new_path = args[0], args[1] + old_resolved = self._resolve(old_path) + new_resolved = self._resolve(new_path) + if not old_resolved.exists(): + raise FileNotFoundError(f"Source not found: {old_path!r}") + if new_resolved.exists(): + raise ValueError(f"Destination already exists: {new_path!r}") + new_resolved.parent.mkdir(parents=True, exist_ok=True) + old_resolved.rename(new_resolved) + logger.info("🧠 Memory renamed: %s → %s", old_path, new_path) + return CmdReturn(stdout=f"Renamed {old_path} to {new_path}.", stderr="", return_code=0) + + def _clear(self) -> CmdReturn: + if self._memory_dir.exists(): + shutil.rmtree(self._memory_dir) + self._memory_dir.mkdir(parents=True, exist_ok=True) + logger.info("🧠 Memory cleared.") + return CmdReturn(stdout="Memory cleared.", stderr="", return_code=0) From 315cc03ad359947624f5ebdbc09efed0af094ae4 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Tue, 3 Mar 2026 10:51:30 +0000 Subject: [PATCH 02/20] Refactor MemoryTool error handling to return CmdReturn instead of raising exceptions and add tests --- .../tools/tool_definitions/memory_tool.py | 20 +- test/llm/test_anthropic_api.py | 367 ++++++++++++++ .../tool_definitions/test_memory_tool.py | 449 ++++++++++++++++++ 3 files changed, 826 insertions(+), 10 deletions(-) create mode 100644 test/tools/tool_definitions/test_memory_tool.py diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py index 912fe6d..310f880 100644 --- a/src/microbots/tools/tool_definitions/memory_tool.py +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -188,7 +188,7 @@ def _view(self, args: list) -> CmdReturn: resolved = self._resolve(path) if not resolved.exists(): - raise RuntimeError(f"Path not found: {path!r}") + return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1) if resolved.is_dir(): items = [ @@ -237,13 +237,13 @@ def _str_replace(self, args: list) -> CmdReturn: return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1) resolved = self._resolve(path) if not resolved.is_file(): - raise FileNotFoundError(f"File not found: {path!r}") + return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1) content = resolved.read_text(encoding="utf-8") count = content.count(old_text) if count == 0: - raise ValueError(f"Text not found in {path!r}") + return CmdReturn(stdout="", stderr=f"Text not found in {path!r}", return_code=1) if count > 1: - raise ValueError(f"Text appears {count} times in {path!r} — must be unique") + return CmdReturn(stdout="", stderr=f"Text appears {count} times in {path!r} — must be unique", return_code=1) resolved.write_text(content.replace(old_text, new_text, 1), encoding="utf-8") return CmdReturn(stdout=f"File {path} edited.", stderr="", return_code=0) @@ -264,10 +264,10 @@ def _insert(self, args: list) -> CmdReturn: return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1) resolved = self._resolve(path) if not resolved.is_file(): - raise FileNotFoundError(f"File not found: {path!r}") + return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1) lines = resolved.read_text(encoding="utf-8").splitlines() if line_num < 0 or line_num > len(lines): - raise ValueError(f"Invalid line number {line_num}. Must be 0–{len(lines)}.") + return CmdReturn(stdout="", stderr=f"Invalid line number {line_num}. Must be 0–{len(lines)}.", return_code=1) lines.insert(line_num, text.rstrip("\n")) resolved.write_text("\n".join(lines) + "\n", encoding="utf-8") return CmdReturn(stdout=f"Text inserted at line {line_num} in {path}.", stderr="", return_code=0) @@ -277,7 +277,7 @@ def _delete(self, args: list) -> CmdReturn: return CmdReturn(stdout="", stderr="Usage: memory delete ", return_code=1) path = args[0] if path.rstrip("/") in ("/memories", "memories", ""): - raise ValueError("Cannot delete the /memories root directory") + return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1) resolved = self._resolve(path) if resolved.is_file(): resolved.unlink() @@ -287,7 +287,7 @@ def _delete(self, args: list) -> CmdReturn: shutil.rmtree(resolved) logger.info("🧠 Memory directory deleted: %s", path) return CmdReturn(stdout=f"Deleted directory: {path}", stderr="", return_code=0) - raise FileNotFoundError(f"Path not found: {path!r}") + return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1) def _rename(self, args: list) -> CmdReturn: if len(args) < 2: @@ -296,9 +296,9 @@ def _rename(self, args: list) -> CmdReturn: old_resolved = self._resolve(old_path) new_resolved = self._resolve(new_path) if not old_resolved.exists(): - raise FileNotFoundError(f"Source not found: {old_path!r}") + return CmdReturn(stdout="", stderr=f"Source not found: {old_path!r}", return_code=1) if new_resolved.exists(): - raise ValueError(f"Destination already exists: {new_path!r}") + return CmdReturn(stdout="", stderr=f"Destination already exists: {new_path!r}", return_code=1) new_resolved.parent.mkdir(parents=True, exist_ok=True) old_resolved.rename(new_resolved) logger.info("🧠 Memory renamed: %s → %s", old_path, new_path) diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py index 674294c..49674aa 100644 --- a/test/llm/test_anthropic_api.py +++ b/test/llm/test_anthropic_api.py @@ -98,7 +98,9 @@ def test_ask_successful_response(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "echo 'hello'", @@ -133,7 +135,9 @@ def test_ask_with_task_done_true(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": True, "command": "", @@ -157,12 +161,16 @@ def test_ask_with_retry_on_invalid_response(self): # Mock the Anthropic client to return invalid then valid response mock_invalid_response = Mock() + mock_invalid_response.stop_reason = "end_turn" mock_invalid_content = Mock() + mock_invalid_content.type = "text" mock_invalid_content.text = "invalid json" mock_invalid_response.content = [mock_invalid_content] mock_valid_response = Mock() + mock_valid_response.stop_reason = "end_turn" mock_valid_content = Mock() + mock_valid_content.type = "text" mock_valid_content.text = json.dumps({ "task_done": False, "command": "ls -la", @@ -193,7 +201,9 @@ def test_ask_appends_user_message(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "pwd", @@ -218,7 +228,9 @@ def test_ask_appends_assistant_response_as_json(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "echo test", @@ -247,7 +259,9 @@ def test_ask_uses_asdict_for_response(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" response_dict = { "task_done": True, "command": "", @@ -277,7 +291,9 @@ def test_ask_resets_retries_to_zero(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "ls", @@ -299,7 +315,9 @@ def test_ask_extracts_json_from_markdown(self): # Mock response with markdown-wrapped JSON mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = """Here's the response: ```json { @@ -420,7 +438,9 @@ def test_ask_with_empty_message(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "echo ''", @@ -443,7 +463,9 @@ def test_multiple_ask_calls_append_messages(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "pwd", @@ -513,6 +535,351 @@ def test_anthropic_api_clear_history_integration(self): assert len(api.messages) == 0 # Anthropic doesn't store system in messages +# ============================================================================ +# Tests for native_tools support (new changes) +# ============================================================================ + +@pytest.mark.unit +class TestAnthropicApiNativeToolsInit: + """Tests for __init__ native_tools caching.""" + + @pytest.fixture(autouse=True) + def _use_patch(self, patch_anthropic_config): + pass + + def test_init_without_native_tools_has_empty_caches(self): + api = AnthropicApi(system_prompt="test") + + assert api.native_tools == [] + assert api._native_tool_dicts == [] + assert api._native_tools_by_name == {} + + def test_init_with_none_native_tools_has_empty_caches(self): + api = AnthropicApi(system_prompt="test", native_tools=None) + + assert api._native_tool_dicts == [] + assert api._native_tools_by_name == {} + + def test_init_with_single_native_tool_caches_dict(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"} + + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + assert api._native_tool_dicts == [{"name": "memory", "type": "memory_20250818"}] + + def test_init_with_single_native_tool_caches_by_name(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + assert "memory" in api._native_tools_by_name + assert api._native_tools_by_name["memory"] is tool + + def test_init_with_multiple_native_tools_caches_all(self): + tool1 = Mock() + tool1.to_dict.return_value = {"name": "memory"} + tool2 = Mock() + tool2.to_dict.return_value = {"name": "bash"} + + api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2]) + + assert len(api._native_tool_dicts) == 2 + assert api._native_tools_by_name["memory"] is tool1 + assert api._native_tools_by_name["bash"] is tool2 + + def test_init_calls_to_dict_exactly_once_per_tool(self): + """to_dict() must not be called again on subsequent API calls.""" + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + + AnthropicApi(system_prompt="test", native_tools=[tool]) + + assert tool.to_dict.call_count == 1 + + +@pytest.mark.unit +class TestAnthropicApiCallApiWithTools: + """Tests for _call_api including/excluding the tools kwarg.""" + + @pytest.fixture(autouse=True) + def _use_patch(self, patch_anthropic_config): + pass + + def test_call_api_without_tools_omits_tools_kwarg(self): + api = AnthropicApi(system_prompt="test", deployment_name="claude-3") + api.messages = [{"role": "user", "content": "hello"}] + api.ai_client.messages.create = Mock(return_value=Mock()) + + api._call_api() + + call_kwargs = api.ai_client.messages.create.call_args[1] + assert "tools" not in call_kwargs + + def test_call_api_with_tools_passes_cached_dicts(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"} + api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool]) + api.messages = [{"role": "user", "content": "hello"}] + api.ai_client.messages.create = Mock(return_value=Mock()) + + api._call_api() + + call_kwargs = api.ai_client.messages.create.call_args[1] + assert "tools" in call_kwargs + assert call_kwargs["tools"] == [{"name": "memory", "type": "memory_20250818"}] + + def test_call_api_does_not_call_to_dict_again(self): + """to_dict() should only be called during __init__, never during _call_api.""" + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool]) + api.messages = [{"role": "user", "content": "hello"}] + api.ai_client.messages.create = Mock(return_value=Mock()) + + count_after_init = tool.to_dict.call_count # should be 1 + api._call_api() + api._call_api() + + assert tool.to_dict.call_count == count_after_init # no increase + + +@pytest.mark.unit +class TestAnthropicApiDispatchToolUse: + """Tests for _dispatch_tool_use.""" + + @pytest.fixture(autouse=True) + def _use_patch(self, patch_anthropic_config): + pass + + # ------------------------------------------------------------------ # + # Helpers + # ------------------------------------------------------------------ # + + @staticmethod + def _tool_use_block(name, tool_id="tu_001", input_data=None): + block = Mock() + block.type = "tool_use" + block.name = name + block.id = tool_id + block.input = input_data or {} + block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": name} + return block + + @staticmethod + def _text_block(text="hello"): + block = Mock() + block.type = "text" + block.text = text + block.model_dump.return_value = {"type": "text", "text": text} + return block + + # ------------------------------------------------------------------ # + # Tests + # ------------------------------------------------------------------ # + + def test_dispatch_appends_assistant_message_first(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "ok" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + response = Mock() + response.content = [self._tool_use_block("memory")] + api._dispatch_tool_use(response) + + assert api.messages[0]["role"] == "assistant" + + def test_dispatch_appends_tool_result_user_message(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "file listing" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + response = Mock() + response.content = [self._tool_use_block("memory", tool_id="tu_abc")] + api._dispatch_tool_use(response) + + user_msg = api.messages[1] + assert user_msg["role"] == "user" + assert user_msg["content"][0]["type"] == "tool_result" + assert user_msg["content"][0]["tool_use_id"] == "tu_abc" + assert user_msg["content"][0]["content"] == "file listing" + + def test_dispatch_calls_tool_with_correct_input(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "ok" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + input_data = {"command": "view", "path": "/memories"} + response = Mock() + response.content = [self._tool_use_block("memory", input_data=input_data)] + api._dispatch_tool_use(response) + + tool.call.assert_called_once_with(input_data) + + def test_dispatch_unknown_tool_returns_error_in_result(self): + api = AnthropicApi(system_prompt="test") # no native tools + + response = Mock() + response.content = [self._tool_use_block("unknown_tool", tool_id="tu_err")] + api._dispatch_tool_use(response) + + content = api.messages[1]["content"][0]["content"] + assert "Error" in content + assert "unknown_tool" in content + + def test_dispatch_tool_exception_returns_error_message(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.side_effect = RuntimeError("disk full") + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + response = Mock() + response.content = [self._tool_use_block("memory", tool_id="tu_exc")] + api._dispatch_tool_use(response) + + content = api.messages[1]["content"][0]["content"] + assert "Error" in content + assert "disk full" in content + + def test_dispatch_skips_non_tool_use_content_blocks(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "result" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + response = Mock() + response.content = [ + self._text_block("thinking..."), + self._tool_use_block("memory", tool_id="tu_only"), + ] + api._dispatch_tool_use(response) + + tool_results = api.messages[1]["content"] + assert len(tool_results) == 1 + assert tool_results[0]["tool_use_id"] == "tu_only" + + def test_dispatch_handles_multiple_tool_use_blocks(self): + tool1 = Mock() + tool1.to_dict.return_value = {"name": "memory"} + tool1.call.return_value = "memory result" + tool2 = Mock() + tool2.to_dict.return_value = {"name": "bash"} + tool2.call.return_value = "bash result" + api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2]) + + response = Mock() + response.content = [ + self._tool_use_block("memory", tool_id="id_1"), + self._tool_use_block("bash", tool_id="id_2"), + ] + api._dispatch_tool_use(response) + + results = api.messages[1]["content"] + assert len(results) == 2 + assert results[0]["tool_use_id"] == "id_1" + assert results[0]["content"] == "memory result" + assert results[1]["tool_use_id"] == "id_2" + assert results[1]["content"] == "bash result" + + +@pytest.mark.unit +class TestAnthropicApiAskWithToolUseLoop: + """Tests for ask() cycling through tool_use rounds before returning JSON.""" + + @pytest.fixture(autouse=True) + def _use_patch(self, patch_anthropic_config): + pass + + @staticmethod + def _tool_use_response(tool_name, tool_id): + block = Mock() + block.type = "tool_use" + block.name = tool_name + block.id = tool_id + block.input = {} + block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": tool_name} + response = Mock() + response.stop_reason = "tool_use" + response.content = [block] + return response + + @staticmethod + def _text_response(json_dict): + block = Mock() + block.type = "text" + block.text = json.dumps(json_dict) + block.model_dump.return_value = {"type": "text", "text": block.text} + response = Mock() + response.stop_reason = "end_turn" + response.content = [block] + return response + + def test_ask_dispatches_one_tool_use_round_then_returns(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "viewed /memories" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + tool_resp = self._tool_use_response("memory", "tu_1") + final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""}) + api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp]) + + result = api.ask("do the task") + + assert api.ai_client.messages.create.call_count == 2 + tool.call.assert_called_once() + assert result.command == "ls /" + + def test_ask_dispatches_multiple_tool_use_rounds(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "ok" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + tool_resp1 = self._tool_use_response("memory", "tu_1") + tool_resp2 = self._tool_use_response("memory", "tu_2") + final_resp = self._text_response({"task_done": True, "command": "", "thoughts": "done"}) + api.ai_client.messages.create = Mock(side_effect=[tool_resp1, tool_resp2, final_resp]) + + result = api.ask("do the task") + + assert api.ai_client.messages.create.call_count == 3 + assert tool.call.call_count == 2 + assert result.task_done is True + + def test_ask_without_tool_use_does_not_dispatch(self): + api = AnthropicApi(system_prompt="test") + + final_resp = self._text_response({"task_done": False, "command": "pwd", "thoughts": ""}) + api.ai_client.messages.create = Mock(return_value=final_resp) + + result = api.ask("where am I?") + + assert api.ai_client.messages.create.call_count == 1 + assert result.command == "pwd" + + def test_ask_tool_use_messages_are_added_to_history(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory"} + tool.call.return_value = "result" + api = AnthropicApi(system_prompt="test", native_tools=[tool]) + + tool_resp = self._tool_use_response("memory", "tu_1") + final_resp = self._text_response({"task_done": False, "command": "echo hi", "thoughts": ""}) + api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp]) + + api.ask("do it") + + # Messages: user, assistant(tool_use), user(tool_result), assistant(final json) + roles = [m["role"] for m in api.messages] + assert roles.count("user") == 2 + assert roles.count("assistant") == 2 + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py new file mode 100644 index 0000000..2c317f6 --- /dev/null +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -0,0 +1,449 @@ +""" +Unit tests for MemoryTool — file-backed memory store. + +All tests use pytest's tmp_path fixture so they are isolated from the +user's real ~/.microbots/memory directory. +""" +import sys +import os +import pytest +from pathlib import Path +from unittest.mock import Mock + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../src"))) + +from microbots.tools.tool_definitions.memory_tool import MemoryTool +from microbots.environment.Environment import CmdReturn + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def make_tool(tmp_path: Path) -> MemoryTool: + """Return a MemoryTool whose memory_dir lives under tmp_path.""" + return MemoryTool(memory_dir=str(tmp_path / "memory")) + + +# --------------------------------------------------------------------------- +# Initialisation +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolInit: + + def test_memory_dir_is_created_on_init(self, tmp_path): + mem_dir = tmp_path / "memory" + assert not mem_dir.exists() + + make_tool(tmp_path) + + assert mem_dir.exists() + assert mem_dir.is_dir() + + def test_default_memory_dir_under_home(self, monkeypatch, tmp_path): + """When no memory_dir is given it falls back to ~/.microbots/memory.""" + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) + tool = MemoryTool() + assert tool._memory_dir == tmp_path / ".microbots" / "memory" + + +# --------------------------------------------------------------------------- +# is_invoked +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolIsInvoked: + + def test_returns_true_for_memory_commands(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked("memory view /memories") is True + assert tool.is_invoked("memory create /memories/f.md hello") is True + + def test_returns_false_for_other_commands(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked("ls -la") is False + assert tool.is_invoked("cat file.txt") is False + assert tool.is_invoked("") is False + + def test_strips_leading_whitespace(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked(" memory view /memories") is True + + +# --------------------------------------------------------------------------- +# Path resolution (_resolve) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolResolve: + + def test_resolve_memories_root(self, tmp_path): + tool = make_tool(tmp_path) + assert tool._resolve("/memories") == tool._memory_dir.resolve() + + def test_resolve_memories_subpath(self, tmp_path): + tool = make_tool(tmp_path) + resolved = tool._resolve("/memories/notes.md") + assert resolved == (tool._memory_dir / "notes.md").resolve() + + def test_resolve_rejects_path_traversal(self, tmp_path): + tool = make_tool(tmp_path) + with pytest.raises(ValueError, match="Path traversal"): + tool._resolve("/memories/../../etc/passwd") + + def test_resolve_rejects_non_memory_paths(self, tmp_path): + tool = make_tool(tmp_path) + for bad in ("/workdir/file", "/home/user/file", "/tmp/file"): + with pytest.raises(ValueError): + tool._resolve(bad) + + +# --------------------------------------------------------------------------- +# _view +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolView: + + def test_view_directory_lists_contents(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "notes.md").write_text("hello") + (tool._memory_dir / "sub").mkdir() + + result = tool._view(["/memories"]) + + assert result.return_code == 0 + assert "notes.md" in result.stdout + assert "sub/" in result.stdout + + def test_view_file_returns_numbered_lines(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\nline3\n") + + result = tool._view(["/memories/f.md"]) + + assert result.return_code == 0 + assert "1:" in result.stdout + assert "line1" in result.stdout + assert "3:" in result.stdout + + def test_view_file_with_line_range(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n") + + result = tool._view(["/memories/f.md", "--start", "2", "--end", "4"]) + + assert result.return_code == 0 + assert "b" in result.stdout + assert "d" in result.stdout + assert "a" not in result.stdout + assert "e" not in result.stdout + + def test_view_nonexistent_path_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool._view(["/memories/nonexistent.md"]) + + assert result.return_code != 0 + + def test_view_no_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._view([]) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _create +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolCreate: + + def test_create_writes_file(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool._create(["/memories/notes.md", "hello world"]) + + assert result.return_code == 0 + assert (tool._memory_dir / "notes.md").read_text() == "hello world" + + def test_create_overwrites_existing_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("old content") + + result = tool._create(["/memories/f.md", "new content"]) + + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "new content" + + def test_create_creates_parent_directories(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool._create(["/memories/sub/dir/f.md", "content"]) + + assert result.return_code == 0 + assert (tool._memory_dir / "sub" / "dir" / "f.md").exists() + + def test_create_missing_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._create(["/memories/f.md"]) # missing content + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _str_replace +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolStrReplace: + + def test_str_replace_replaces_unique_text(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "goodbye"]) + + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + + def test_str_replace_fails_when_text_not_found(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + result = tool._str_replace(["/memories/f.md", "--old", "nothere", "--new", "x"]) + + assert result.return_code != 0 + assert "not found" in result.stderr.lower() + + def test_str_replace_fails_when_text_not_unique(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello hello") + + result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "bye"]) + + assert result.return_code != 0 + assert "2" in result.stderr # appears N times + + def test_str_replace_missing_flags_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("text") + result = tool._str_replace(["/memories/f.md"]) + assert result.return_code != 0 + + def test_str_replace_nonexistent_file_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"]) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _insert +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolInsert: + + def test_insert_prepends_at_line_zero(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + result = tool._insert(["/memories/f.md", "--line", "0", "--text", "prepended"]) + + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + assert lines[1] == "line1" + + def test_insert_at_end_of_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + result = tool._insert(["/memories/f.md", "--line", "2", "--text", "appended"]) + + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[-1] == "appended" + + def test_insert_invalid_line_number_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\n") + + result = tool._insert(["/memories/f.md", "--line", "99", "--text", "x"]) + + assert result.return_code != 0 + + def test_insert_nonexistent_file_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._insert(["/memories/missing.md", "--line", "0", "--text", "x"]) + assert result.return_code != 0 + + def test_insert_missing_flags_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\n") + result = tool._insert(["/memories/f.md"]) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _delete +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolDelete: + + def test_delete_removes_file(self, tmp_path): + tool = make_tool(tmp_path) + f = tool._memory_dir / "f.md" + f.write_text("data") + + result = tool._delete(["/memories/f.md"]) + + assert result.return_code == 0 + assert not f.exists() + + def test_delete_removes_directory(self, tmp_path): + tool = make_tool(tmp_path) + sub = tool._memory_dir / "sub" + sub.mkdir() + (sub / "f.md").write_text("data") + + result = tool._delete(["/memories/sub"]) + + assert result.return_code == 0 + assert not sub.exists() + + def test_delete_prevents_root_deletion(self, tmp_path): + tool = make_tool(tmp_path) + for path in ("/memories", "memories", "/memories/"): + result = tool._delete([path]) + assert result.return_code != 0 + + def test_delete_nonexistent_path_raises(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._delete(["/memories/nonexistent.md"]) + assert result.return_code != 0 + + def test_delete_no_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._delete([]) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _rename +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolRename: + + def test_rename_moves_file(self, tmp_path): + tool = make_tool(tmp_path) + src = tool._memory_dir / "old.md" + src.write_text("content") + + result = tool._rename(["/memories/old.md", "/memories/new.md"]) + + assert result.return_code == 0 + assert not src.exists() + assert (tool._memory_dir / "new.md").read_text() == "content" + + def test_rename_nonexistent_source_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._rename(["/memories/missing.md", "/memories/new.md"]) + assert result.return_code != 0 + + def test_rename_fails_if_destination_exists(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "a.md").write_text("a") + (tool._memory_dir / "b.md").write_text("b") + + result = tool._rename(["/memories/a.md", "/memories/b.md"]) + + assert result.return_code != 0 + + def test_rename_missing_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._rename(["/memories/a.md"]) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _clear +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolClear: + + def test_clear_removes_all_files(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "a.md").write_text("a") + (tool._memory_dir / "b.md").write_text("b") + + result = tool._clear() + + assert result.return_code == 0 + assert list(tool._memory_dir.iterdir()) == [] + + def test_clear_leaves_memory_dir_intact(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + tool._clear() + + assert tool._memory_dir.exists() + + +# --------------------------------------------------------------------------- +# invoke — full command dispatch +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolInvoke: + + def test_invoke_view_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello") + + result = tool.invoke("memory view /memories/f.md", parent_bot=Mock()) + + assert result.return_code == 0 + assert "hello" in result.stdout + + def test_invoke_create_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool.invoke('memory create /memories/n.md "some content"', parent_bot=Mock()) + + assert result.return_code == 0 + assert (tool._memory_dir / "n.md").read_text() == "some content" + + def test_invoke_clear_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + result = tool.invoke("memory clear", parent_bot=Mock()) + + assert result.return_code == 0 + assert list(tool._memory_dir.iterdir()) == [] + + def test_invoke_unknown_subcommand_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke("memory frobnicate /memories/f.md", parent_bot=Mock()) + assert result.return_code != 0 + assert "Unknown subcommand" in result.stderr + + def test_invoke_too_few_tokens_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke("memory", parent_bot=Mock()) + assert result.return_code != 0 + + def test_invoke_handles_bad_quoting_gracefully(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke('memory create /memories/f.md "unclosed', parent_bot=Mock()) + assert result.return_code != 0 + assert "Parse error" in result.stderr + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) From 0260226a00118678c240951d5e66611b4cb8e0a1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Mar 2026 07:48:13 +0000 Subject: [PATCH 03/20] Initial plan From ae8e01ef1941a529f3f4a6cc496ed2cf284e1869 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Mar 2026 07:49:29 +0000 Subject: [PATCH 04/20] Add workflow_dispatch trigger to dockerBuildPush workflow Co-authored-by: KavyaSree2610 <92566732+KavyaSree2610@users.noreply.github.com> --- .github/workflows/dockerBuildPush.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/dockerBuildPush.yml b/.github/workflows/dockerBuildPush.yml index 08341d0..ae10709 100644 --- a/.github/workflows/dockerBuildPush.yml +++ b/.github/workflows/dockerBuildPush.yml @@ -1,6 +1,7 @@ name: Build and Push to Docker Hub on: + workflow_dispatch: push: paths: - 'src/microbots/environment/local_docker/image_builder/Dockerfile' From abba9026ae427110e0479f5f329f30cd7b8eea5a Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Thu, 5 Mar 2026 08:22:40 +0000 Subject: [PATCH 05/20] Modify comments in AnthropicMemoryTool implementation --- .../tools/tool_definitions/anthropic_memory_tool.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py index e80750c..9cf547e 100644 --- a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py +++ b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py @@ -8,8 +8,8 @@ This implementation extends both: - ``MemoryTool``: provides all file-operation logic (_resolve, _view, - _create, _str_replace, _insert, _delete, _rename, _clear) and the - ToolAbstract duck-typing interface. + _create, _str_replace, _insert, _delete, _rename, _clear) and satisfies + the ``ToolAbstract`` ABC (install_tool, verify_tool_installation, etc.). - ``BetaAbstractMemoryTool`` (SDK): provides native Anthropic dispatch and the ``to_dict()`` / ``call()`` interface required by AnthropicApi. @@ -108,7 +108,7 @@ def __init__( _SDKMemoryTool.__init__(self) # type: ignore[call-arg] # ---------------------------------------------------------------------- # - # ToolAbstract duck-typing overrides + # ToolAbstract overrides # ---------------------------------------------------------------------- # def is_model_supported(self, model_name: str) -> bool: From 213aa9e70bd249df48a74e1e956930ea14a76656 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Thu, 5 Mar 2026 09:01:00 +0000 Subject: [PATCH 06/20] Add unit tests for MemoryTool and AnthropicMemoryTool functionality --- test/bot/test_upgrade_tools_for_provider.py | 161 ++++++++ .../test_anthropic_memory_tool.py | 347 ++++++++++++++++++ .../tool_definitions/test_memory_tool.py | 104 ++++++ 3 files changed, 612 insertions(+) create mode 100644 test/bot/test_upgrade_tools_for_provider.py create mode 100644 test/tools/tool_definitions/test_anthropic_memory_tool.py diff --git a/test/bot/test_upgrade_tools_for_provider.py b/test/bot/test_upgrade_tools_for_provider.py new file mode 100644 index 0000000..f5d1e98 --- /dev/null +++ b/test/bot/test_upgrade_tools_for_provider.py @@ -0,0 +1,161 @@ +""" +Unit tests for MicroBot._upgrade_tools_for_provider. + +These tests verify that plain ``MemoryTool`` instances are automatically +replaced with ``AnthropicMemoryTool`` when the model provider is Anthropic, +and that no changes are made for other providers or other tool types. + +All tests bypass the heavy MicroBot constructor (Docker environment, LLM +creation) by constructing an uninitialized instance with ``object.__new__`` +and manually setting only the attributes the method under test needs. +""" +import sys +import os +import logging +import pytest +from unittest.mock import patch, Mock + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) + +from microbots.MicroBot import MicroBot +from microbots.constants import ModelProvider +from microbots.tools.tool_definitions.memory_tool import MemoryTool +from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _bare_microbot(model_provider: str, tools: list) -> MicroBot: + """Return an uninitialized MicroBot with only the attributes that + ``_upgrade_tools_for_provider`` inspects.""" + bot = object.__new__(MicroBot) + bot.model_provider = model_provider + bot.additional_tools = list(tools) + return bot + + +def _memory_tool(tmp_path, instructions: str = "default instructions") -> MemoryTool: + return MemoryTool( + memory_dir=str(tmp_path / "memory"), + usage_instructions_to_llm=instructions, + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestUpgradeToolsForProvider: + + # -- Anthropic provider: MemoryTool → AnthropicMemoryTool --------------- + + def test_memory_tool_is_replaced_with_anthropic_variant(self, tmp_path): + tool = _memory_tool(tmp_path) + bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool]) + + bot._upgrade_tools_for_provider() + + assert len(bot.additional_tools) == 1 + assert isinstance(bot.additional_tools[0], AnthropicMemoryTool) + + def test_memory_dir_is_forwarded_to_upgraded_tool(self, tmp_path): + mem_dir = str(tmp_path / "my_memory") + tool = MemoryTool(memory_dir=mem_dir) + bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool]) + + bot._upgrade_tools_for_provider() + + upgraded = bot.additional_tools[0] + assert isinstance(upgraded, AnthropicMemoryTool) + assert str(upgraded.memory_dir) == mem_dir + + def test_usage_instructions_are_forwarded_to_upgraded_tool(self, tmp_path): + custom_instructions = "custom memory instructions for test" + tool = _memory_tool(tmp_path, instructions=custom_instructions) + bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool]) + + bot._upgrade_tools_for_provider() + + upgraded = bot.additional_tools[0] + assert upgraded.usage_instructions_to_llm == custom_instructions + + def test_already_anthropic_memory_tool_is_not_re_upgraded(self, tmp_path): + existing = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory")) + bot = _bare_microbot(ModelProvider.ANTHROPIC, [existing]) + + bot._upgrade_tools_for_provider() + + assert len(bot.additional_tools) == 1 + assert bot.additional_tools[0] is existing + + def test_non_memory_tools_are_kept_unchanged(self, tmp_path): + other_tool = Mock() + other_tool.__class__ = Mock # not a MemoryTool subclass + bot = _bare_microbot(ModelProvider.ANTHROPIC, [other_tool]) + + bot._upgrade_tools_for_provider() + + assert len(bot.additional_tools) == 1 + assert bot.additional_tools[0] is other_tool + + def test_mixed_tool_list_upgrades_only_memory_tools(self, tmp_path): + plain_memory = _memory_tool(tmp_path) + already_upgraded = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory2")) + other_tool = Mock(spec=[]) + bot = _bare_microbot(ModelProvider.ANTHROPIC, [plain_memory, already_upgraded, other_tool]) + + bot._upgrade_tools_for_provider() + + assert len(bot.additional_tools) == 3 + # first: should have been upgraded + assert isinstance(bot.additional_tools[0], AnthropicMemoryTool) + assert bot.additional_tools[0] is not plain_memory + # second: already AnthropicMemoryTool, untouched + assert bot.additional_tools[1] is already_upgraded + # third: non-memory tool, untouched + assert bot.additional_tools[2] is other_tool + + def test_empty_tool_list_is_a_no_op(self): + bot = _bare_microbot(ModelProvider.ANTHROPIC, []) + + bot._upgrade_tools_for_provider() + + assert bot.additional_tools == [] + + def test_logger_info_called_for_each_upgraded_tool(self, tmp_path, caplog): + tool1 = _memory_tool(tmp_path) + tmp_path2 = tmp_path / "sub" + tmp_path2.mkdir() + tool2 = _memory_tool(tmp_path2) + bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool1, tool2]) + + with caplog.at_level(logging.INFO, logger=" MicroBot "): + bot._upgrade_tools_for_provider() + + upgrade_logs = [r for r in caplog.records if "Auto-upgrading" in r.message] + assert len(upgrade_logs) == 2 + + # -- Non-Anthropic providers: no upgrade should happen ------------------ + + @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL]) + def test_no_upgrade_for_non_anthropic_provider(self, tmp_path, provider): + tool = _memory_tool(tmp_path) + bot = _bare_microbot(provider, [tool]) + + bot._upgrade_tools_for_provider() + + assert len(bot.additional_tools) == 1 + assert isinstance(bot.additional_tools[0], MemoryTool) + assert not isinstance(bot.additional_tools[0], AnthropicMemoryTool) + + @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL]) + def test_original_tool_identity_preserved_for_non_anthropic(self, tmp_path, provider): + tool = _memory_tool(tmp_path) + bot = _bare_microbot(provider, [tool]) + + bot._upgrade_tools_for_provider() + + assert bot.additional_tools[0] is tool diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py new file mode 100644 index 0000000..6668bfc --- /dev/null +++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py @@ -0,0 +1,347 @@ +""" +Unit tests for AnthropicMemoryTool. + +Covers: + - __init__: memory_dir / usage_instructions forwarding and defaults + - is_model_supported + - is_invoked + - clear_all / clear_all_memory (SDK override) + - SDK overrides: view, create, str_replace, insert, delete, rename + (happy-path + RuntimeError on failure) +""" +import logging +import pytest + +from anthropic.types.beta import ( + BetaMemoryTool20250818CreateCommand, + BetaMemoryTool20250818DeleteCommand, + BetaMemoryTool20250818InsertCommand, + BetaMemoryTool20250818RenameCommand, + BetaMemoryTool20250818StrReplaceCommand, + BetaMemoryTool20250818ViewCommand, +) + +from microbots.tools.tool_definitions.anthropic_memory_tool import ( + DEFAULT_MEMORY_INSTRUCTIONS, + AnthropicMemoryTool, +) +from microbots.tools.tool_definitions.memory_tool import MemoryTool + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def make_tool(tmp_path) -> AnthropicMemoryTool: + return AnthropicMemoryTool(memory_dir=str(tmp_path / "memory")) + + +# --------------------------------------------------------------------------- +# __init__ +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolInit: + + def test_is_subclass_of_memory_tool(self, tmp_path): + assert isinstance(make_tool(tmp_path), MemoryTool) + + def test_memory_dir_is_forwarded(self, tmp_path): + mem_dir = str(tmp_path / "my_memory") + tool = AnthropicMemoryTool(memory_dir=mem_dir) + assert str(tool._memory_dir) == mem_dir + + def test_memory_dir_is_created_on_init(self, tmp_path): + mem_dir = tmp_path / "new_memory" + assert not mem_dir.exists() + AnthropicMemoryTool(memory_dir=str(mem_dir)) + assert mem_dir.exists() + + def test_default_memory_dir_under_home(self, monkeypatch, tmp_path): + from pathlib import Path + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) + tool = AnthropicMemoryTool() + assert tool._memory_dir == tmp_path / ".microbots" / "memory" + + def test_custom_usage_instructions_are_stored(self, tmp_path): + custom = "custom instructions" + tool = AnthropicMemoryTool( + memory_dir=str(tmp_path / "memory"), + usage_instructions=custom, + ) + assert tool.usage_instructions_to_llm == custom + + def test_default_usage_instructions_are_applied_when_none(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.usage_instructions_to_llm == DEFAULT_MEMORY_INSTRUCTIONS + + +# --------------------------------------------------------------------------- +# is_model_supported +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolIsModelSupported: + + def test_returns_true_for_claude_models(self, tmp_path): + tool = make_tool(tmp_path) + for model in ("claude-3-sonnet", "claude-3-5-haiku", "Claude-Opus-4"): + assert tool.is_model_supported(model) is True + + def test_returns_false_for_non_claude_models(self, tmp_path): + tool = make_tool(tmp_path) + for model in ("gpt-4", "ollama/llama3", "azure-openai/gpt-5", ""): + assert tool.is_model_supported(model) is False + + +# --------------------------------------------------------------------------- +# is_invoked +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolIsInvoked: + + def test_always_returns_false(self, tmp_path): + tool = make_tool(tmp_path) + for cmd in ("memory view /memories", "memory clear", "anything", ""): + assert tool.is_invoked(cmd) is False + + +# --------------------------------------------------------------------------- +# clear_all / clear_all_memory +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolClearAll: + + def test_clear_all_removes_all_files(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "a.md").write_text("a") + (tool._memory_dir / "b.md").write_text("b") + + tool.clear_all() + + assert list(tool._memory_dir.iterdir()) == [] + + def test_clear_all_leaves_memory_dir_intact(self, tmp_path): + tool = make_tool(tmp_path) + tool.clear_all() + assert tool._memory_dir.exists() + + def test_clear_all_logs_info(self, tmp_path, caplog): + tool = make_tool(tmp_path) + with caplog.at_level(logging.INFO): + tool.clear_all() + assert "AnthropicMemoryTool" in caplog.text + assert "cleared" in caplog.text + + def test_clear_all_memory_returns_string(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.clear_all_memory() + assert result == "All memory cleared" + + def test_clear_all_memory_removes_files(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + tool.clear_all_memory() + + assert list(tool._memory_dir.iterdir()) == [] + + +# --------------------------------------------------------------------------- +# view (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolView: + + def test_view_returns_file_contents(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "notes.md").write_text("hello\nworld\n") + + cmd = BetaMemoryTool20250818ViewCommand( + command="view", path="/memories/notes.md", view_range=None + ) + result = tool.view(cmd) + + assert "hello" in result + assert "world" in result + + def test_view_with_view_range(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n") + + cmd = BetaMemoryTool20250818ViewCommand( + command="view", path="/memories/f.md", view_range=[2, 4] + ) + result = tool.view(cmd) + + assert "b" in result + assert "d" in result + assert "a" not in result + assert "e" not in result + + def test_view_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818ViewCommand( + command="view", path="/memories/nonexistent.md", view_range=None + ) + with pytest.raises(RuntimeError): + tool.view(cmd) + + +# --------------------------------------------------------------------------- +# create (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolCreate: + + def test_create_writes_file(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818CreateCommand( + command="create", path="/memories/new.md", file_text="hello world" + ) + result = tool.create(cmd) + + assert "new.md" in result + assert (tool._memory_dir / "new.md").read_text() == "hello world" + + def test_create_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + # Path traversal should cause _create to fail via _resolve + cmd = BetaMemoryTool20250818CreateCommand( + command="create", path="/memories/../../etc/evil.md", file_text="x" + ) + with pytest.raises((RuntimeError, ValueError)): + tool.create(cmd) + + +# --------------------------------------------------------------------------- +# str_replace (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolStrReplace: + + def test_str_replace_edits_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + cmd = BetaMemoryTool20250818StrReplaceCommand( + command="str_replace", + path="/memories/f.md", + old_str="hello", + new_str="goodbye", + ) + result = tool.str_replace(cmd) + + assert "f.md" in result + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + + def test_str_replace_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + cmd = BetaMemoryTool20250818StrReplaceCommand( + command="str_replace", + path="/memories/f.md", + old_str="not present", + new_str="x", + ) + with pytest.raises(RuntimeError): + tool.str_replace(cmd) + + +# --------------------------------------------------------------------------- +# insert (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolInsert: + + def test_insert_prepends_line(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + cmd = BetaMemoryTool20250818InsertCommand( + command="insert", + path="/memories/f.md", + insert_line=0, + insert_text="prepended", + ) + result = tool.insert(cmd) + + assert "0" in result or "prepended" in result or "f.md" in result + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + + def test_insert_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818InsertCommand( + command="insert", + path="/memories/missing.md", + insert_line=0, + insert_text="x", + ) + with pytest.raises(RuntimeError): + tool.insert(cmd) + + +# --------------------------------------------------------------------------- +# delete (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolDelete: + + def test_delete_removes_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + cmd = BetaMemoryTool20250818DeleteCommand( + command="delete", path="/memories/f.md" + ) + tool.delete(cmd) + + assert not (tool._memory_dir / "f.md").exists() + + def test_delete_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818DeleteCommand( + command="delete", path="/memories/nonexistent.md" + ) + with pytest.raises(RuntimeError): + tool.delete(cmd) + + +# --------------------------------------------------------------------------- +# rename (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolRename: + + def test_rename_moves_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "old.md").write_text("content") + + cmd = BetaMemoryTool20250818RenameCommand( + command="rename", + old_path="/memories/old.md", + new_path="/memories/new.md", + ) + tool.rename(cmd) + + assert not (tool._memory_dir / "old.md").exists() + assert (tool._memory_dir / "new.md").read_text() == "content" + + def test_rename_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818RenameCommand( + command="rename", + old_path="/memories/missing.md", + new_path="/memories/new.md", + ) + with pytest.raises(RuntimeError): + tool.rename(cmd) diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py index 2c317f6..5679d5e 100644 --- a/test/tools/tool_definitions/test_memory_tool.py +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -98,6 +98,20 @@ def test_resolve_rejects_non_memory_paths(self, tmp_path): with pytest.raises(ValueError): tool._resolve(bad) + def test_resolve_bare_relative_path_treated_as_relative_to_memory_dir(self, tmp_path): + """The else branch: a path without a /memories/ prefix is resolved + relative to memory_dir.""" + tool = make_tool(tmp_path) + resolved = tool._resolve("notes.md") + assert resolved == (tool._memory_dir / "notes.md").resolve() + + def test_resolve_bare_relative_subdir_path(self, tmp_path): + """A bare relative path with subdirectory components is also resolved + relative to memory_dir (else branch).""" + tool = make_tool(tmp_path) + resolved = tool._resolve("sub/dir/file.md") + assert resolved == (tool._memory_dir / "sub" / "dir" / "file.md").resolve() + # --------------------------------------------------------------------------- # _view @@ -231,6 +245,13 @@ def test_str_replace_missing_flags_returns_error(self, tmp_path): result = tool._str_replace(["/memories/f.md"]) assert result.return_code != 0 + def test_str_replace_empty_args_returns_usage_error(self, tmp_path): + """if not args branch: calling _str_replace([]) returns the usage message.""" + tool = make_tool(tmp_path) + result = tool._str_replace([]) + assert result.return_code == 1 + assert "Usage: memory str_replace" in result.stderr + def test_str_replace_nonexistent_file_returns_error(self, tmp_path): tool = make_tool(tmp_path) result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"]) @@ -284,6 +305,13 @@ def test_insert_missing_flags_returns_error(self, tmp_path): result = tool._insert(["/memories/f.md"]) assert result.return_code != 0 + def test_insert_empty_args_returns_usage_error(self, tmp_path): + """if not args branch: calling _insert([]) returns the usage message.""" + tool = make_tool(tmp_path) + result = tool._insert([]) + assert result.return_code == 1 + assert "Usage: memory insert" in result.stderr + # --------------------------------------------------------------------------- # _delete @@ -394,6 +422,19 @@ def test_clear_leaves_memory_dir_intact(self, tmp_path): assert tool._memory_dir.exists() +# --------------------------------------------------------------------------- +# is_model_supported +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolIsModelSupported: + + def test_returns_true_for_any_model(self, tmp_path): + tool = make_tool(tmp_path) + for model in ("gpt-4", "claude-3-sonnet", "ollama/llama3", ""): + assert tool.is_model_supported(model) is True + + # --------------------------------------------------------------------------- # invoke — full command dispatch # --------------------------------------------------------------------------- @@ -444,6 +485,69 @@ def test_invoke_handles_bad_quoting_gracefully(self, tmp_path): assert result.return_code != 0 assert "Parse error" in result.stderr + def test_invoke_str_replace_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + result = tool.invoke( + 'memory str_replace /memories/f.md --old "hello" --new "goodbye"', + parent_bot=Mock(), + ) + + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + + def test_invoke_insert_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + result = tool.invoke( + 'memory insert /memories/f.md --line 0 --text "prepended"', + parent_bot=Mock(), + ) + + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + + def test_invoke_delete_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + f = tool._memory_dir / "f.md" + f.write_text("data") + + result = tool.invoke("memory delete /memories/f.md", parent_bot=Mock()) + + assert result.return_code == 0 + assert not f.exists() + + def test_invoke_rename_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "old.md").write_text("content") + + result = tool.invoke( + "memory rename /memories/old.md /memories/new.md", + parent_bot=Mock(), + ) + + assert result.return_code == 0 + assert (tool._memory_dir / "new.md").read_text() == "content" + assert not (tool._memory_dir / "old.md").exists() + + def test_invoke_exception_returned_as_error_cmdreturn(self, tmp_path): + """ValueError/FileNotFoundError/RuntimeError raised inside a subcommand + are caught and returned as a CmdReturn with return_code=1.""" + tool = make_tool(tmp_path) + + # str_replace on a non-existent file raises FileNotFoundError + result = tool.invoke( + 'memory str_replace /memories/missing.md --old "x" --new "y"', + parent_bot=Mock(), + ) + + assert result.return_code == 1 + assert result.stdout == "" + assert result.stderr != "" + if __name__ == "__main__": pytest.main([__file__, "-v"]) From b4d753e938ac4861970427068253df19009f8969 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Thu, 5 Mar 2026 09:23:30 +0000 Subject: [PATCH 07/20] Add tests for coverage --- .../test_anthropic_memory_tool.py | 15 +++++-- .../tool_definitions/test_memory_tool.py | 40 ++++++++++++++++--- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py index 6668bfc..db7069e 100644 --- a/test/tools/tool_definitions/test_anthropic_memory_tool.py +++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py @@ -209,13 +209,20 @@ def test_create_writes_file(self, tmp_path): assert (tool._memory_dir / "new.md").read_text() == "hello world" def test_create_raises_runtime_error_on_failure(self, tmp_path): + """Ensures the `raise RuntimeError(result.stderr)` branch is exercised by + mocking _create to return a non-zero CmdReturn.""" + from unittest.mock import patch + from microbots.environment.Environment import CmdReturn + tool = make_tool(tmp_path) - # Path traversal should cause _create to fail via _resolve cmd = BetaMemoryTool20250818CreateCommand( - command="create", path="/memories/../../etc/evil.md", file_text="x" + command="create", path="/memories/new.md", file_text="x" ) - with pytest.raises((RuntimeError, ValueError)): - tool.create(cmd) + with patch.object( + tool, "_create", return_value=CmdReturn(stdout="", stderr="disk full", return_code=1) + ): + with pytest.raises(RuntimeError, match="disk full"): + tool.create(cmd) # --------------------------------------------------------------------------- diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py index 5679d5e..089ca52 100644 --- a/test/tools/tool_definitions/test_memory_tool.py +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -166,6 +166,14 @@ def test_view_no_args_returns_error(self, tmp_path): result = tool._view([]) assert result.return_code != 0 + def test_view_unknown_flag_is_skipped(self, tmp_path): + """else: i += 1 — unrecognised flags are silently skipped.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello\n") + result = tool._view(["/memories/f.md", "--bogus", "value"]) + assert result.return_code == 0 + assert "hello" in result.stdout + # --------------------------------------------------------------------------- # _create @@ -257,6 +265,16 @@ def test_str_replace_nonexistent_file_returns_error(self, tmp_path): result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"]) assert result.return_code != 0 + def test_str_replace_unknown_flag_is_skipped(self, tmp_path): + """else: i += 1 — unrecognised flags in the arg loop are silently skipped.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + result = tool._str_replace([ + "/memories/f.md", "--bogus", "ignored", "--old", "hello", "--new", "goodbye" + ]) + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + # --------------------------------------------------------------------------- # _insert @@ -312,6 +330,17 @@ def test_insert_empty_args_returns_usage_error(self, tmp_path): assert result.return_code == 1 assert "Usage: memory insert" in result.stderr + def test_insert_unknown_flag_is_skipped(self, tmp_path): + """else: i += 1 — unrecognised flags in the arg loop are silently skipped.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + result = tool._insert([ + "/memories/f.md", "--bogus", "ignored", "--line", "0", "--text", "prepended" + ]) + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + # --------------------------------------------------------------------------- # _delete @@ -534,19 +563,20 @@ def test_invoke_rename_subcommand(self, tmp_path): assert not (tool._memory_dir / "old.md").exists() def test_invoke_exception_returned_as_error_cmdreturn(self, tmp_path): - """ValueError/FileNotFoundError/RuntimeError raised inside a subcommand - are caught and returned as a CmdReturn with return_code=1.""" + """except (ValueError, FileNotFoundError, RuntimeError) block: + a path-traversal path causes _resolve() to raise ValueError inside a + subcommand handler, which is caught and returned as CmdReturn(return_code=1).""" tool = make_tool(tmp_path) - # str_replace on a non-existent file raises FileNotFoundError + # Path traversal triggers ValueError inside _view → caught by except block result = tool.invoke( - 'memory str_replace /memories/missing.md --old "x" --new "y"', + "memory view /memories/../../etc/passwd", parent_bot=Mock(), ) assert result.return_code == 1 assert result.stdout == "" - assert result.stderr != "" + assert "traversal" in result.stderr.lower() or result.stderr != "" if __name__ == "__main__": From 579e6a349355077676ea59211fe7ffc2f7ad6dcd Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Tue, 10 Mar 2026 11:54:18 +0000 Subject: [PATCH 08/20] move tool dispatch out of AnthropicApi into MicroBot run loop --- pytest.ini | 1 + src/microbots/MicroBot.py | 39 +- src/microbots/llm/anthropic_api.py | 117 +++-- src/microbots/llm/llm.py | 9 + .../tool_definitions/anthropic_memory_tool.py | 37 +- test/bot/test_memory_tool_integration.py | 439 ++++++++++++++++++ test/bot/test_upgrade_tools_for_provider.py | 110 ++--- test/llm/test_anthropic_api.py | 289 +++++------- .../test_anthropic_memory_tool.py | 14 +- 9 files changed, 710 insertions(+), 345 deletions(-) create mode 100644 test/bot/test_memory_tool_integration.py diff --git a/pytest.ini b/pytest.ini index d0e4f1f..80758ec 100644 --- a/pytest.ini +++ b/pytest.ini @@ -11,5 +11,6 @@ addopts = markers = unit: Unit tests integration: Integration tests + anthropic_integration: Integration tests requiring a real Anthropic API key slow: Slow tests docker: marks tests that require a running Docker daemon and pull container images diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py index 7b061d7..f2708b1 100644 --- a/src/microbots/MicroBot.py +++ b/src/microbots/MicroBot.py @@ -317,38 +317,7 @@ def _create_environment(self, folder_to_mount: Optional[Mount]): folder_to_mount=folder_to_mount, ) - def _upgrade_tools_for_provider(self): - """Auto-upgrade provider-agnostic tools to their provider-optimised variants. - - Currently: replaces any ``MemoryTool`` with ``AnthropicMemoryTool`` when - the provider is Anthropic so the model gets native structured tool-use - instead of the text-command loop. The ``memory_dir`` and any custom - ``usage_instructions_to_llm`` are forwarded to the upgraded instance. - """ - if self.model_provider != ModelProvider.ANTHROPIC: - return - - # Local imports to avoid pulling Anthropic SDK into non-Anthropic paths - from microbots.tools.tool_definitions.memory_tool import MemoryTool - from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool - - upgraded = [] - for tool in self.additional_tools: - if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool): - logger.info( - "🧠 Auto-upgrading MemoryTool → AnthropicMemoryTool for Anthropic provider" - ) - upgraded.append(AnthropicMemoryTool( - memory_dir=tool.memory_dir, - usage_instructions=tool.usage_instructions_to_llm, - )) - else: - upgraded.append(tool) - self.additional_tools = upgraded - def _create_llm(self): - self._upgrade_tools_for_provider() - # Append tool usage instructions to system prompt system_prompt_with_tools = self.system_prompt if self.system_prompt else "" if self.additional_tools: @@ -365,16 +334,10 @@ def _create_llm(self): system_prompt=system_prompt_with_tools, model_name=self.deployment_name ) elif self.model_provider == ModelProvider.ANTHROPIC: - # Detect Anthropic-native tools (e.g. AnthropicMemoryTool) by duck-typing: - # any tool that exposes both to_dict() and call() is a native Anthropic tool. - native_tools = [ - t for t in self.additional_tools - if callable(getattr(t, "to_dict", None)) and callable(getattr(t, "call", None)) - ] self.llm = AnthropicApi( system_prompt=system_prompt_with_tools, deployment_name=self.deployment_name, - native_tools=native_tools or None, + additional_tools=self.additional_tools, ) # No Else case required as model provider is already validated using _validate_model_and_provider diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py index 4a73ba5..a403f9c 100644 --- a/src/microbots/llm/anthropic_api.py +++ b/src/microbots/llm/anthropic_api.py @@ -21,12 +21,31 @@ class AnthropicApi(LLMInterface): + def upgrade_tools(self, tools: list) -> list: + """Replace ``MemoryTool`` with ``AnthropicMemoryTool`` for native tool-use.""" + from microbots.tools.tool_definitions.memory_tool import MemoryTool + from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + upgraded = [] + for tool in tools: + if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool): + logger.info( + "\U0001f9e0 Auto-upgrading MemoryTool \u2192 AnthropicMemoryTool for Anthropic provider" + ) + upgraded.append(AnthropicMemoryTool( + memory_dir=tool.memory_dir, + usage_instructions=tool.usage_instructions_to_llm, + )) + else: + upgraded.append(tool) + return upgraded + def __init__( self, system_prompt: str, deployment_name: str = deployment_name, max_retries: int = 3, - native_tools: Optional[List] = None, + additional_tools: Optional[List] = None, ): """ Parameters @@ -37,11 +56,10 @@ def __init__( The Anthropic model deployment name. max_retries : int Maximum number of retries for invalid LLM responses. - native_tools : Optional[List] - Anthropic-native tool objects (e.g. ``AnthropicMemoryTool``) that - have both a ``to_dict()`` and a ``call()`` method. These are passed - directly to the API and their tool-use blocks are dispatched here - before the JSON response is returned to the caller. + additional_tools : Optional[List] + Tool objects passed from MicroBot. Any provider-agnostic tools + (e.g. ``MemoryTool``) are silently upgraded to their Anthropic- + native variants, and their API schemas are extracted. """ self.ai_client = Anthropic( api_key=api_key, @@ -50,12 +68,18 @@ def __init__( self.deployment_name = deployment_name self.system_prompt = system_prompt self.messages = [] - self.native_tools = native_tools or [] - # Cache tool dicts once so _call_api and _dispatch_tool_use don't - # re-serialise on every invocation (important when multiple native - # tools are registered, e.g. memory + bash). - self._native_tool_dicts = [t.to_dict() for t in self.native_tools] - self._native_tools_by_name = {d["name"]: t for d, t in zip(self._native_tool_dicts, self.native_tools)} + + # Silently upgrade tools in-place and extract API schemas + tools = additional_tools or [] + upgraded = self.upgrade_tools(tools) + # Mutate the original list so the caller (MicroBot) sees upgraded tools + if additional_tools is not None: + additional_tools[:] = upgraded + self._tool_dicts = [ + t.to_dict() for t in upgraded + if callable(getattr(t, "to_dict", None)) + ] + self._pending_tool_response = None # Set these values here. This logic will be handled in the parent class. self.max_retries = max_retries @@ -66,7 +90,7 @@ def __init__( # ---------------------------------------------------------------------- # def _call_api(self) -> object: - """Call the Anthropic messages API, including native tools when present.""" + """Call the Anthropic messages API, including tool definitions when present.""" kwargs = dict( model=self.deployment_name, system=self.system_prompt, @@ -74,44 +98,24 @@ def _call_api(self) -> object: max_tokens=4096, ) - if self.native_tools: - kwargs["tools"] = self._native_tool_dicts + if self._tool_dicts: + kwargs["tools"] = self._tool_dicts return self.ai_client.messages.create(**kwargs) - def _dispatch_tool_use(self, response) -> None: - """Handle a tool_use response: execute each tool call and append results. + def _append_tool_result(self, response, result_text: str) -> None: + """Append the assistant tool_use turn and the corresponding tool_result user turn. - Mutates ``self.messages`` in place — appends the assistant turn (with - all content blocks) and the corresponding tool_result user turn. + Called when the caller provides the tool execution result via + the next ``ask()`` call. """ - # Append the full assistant message as-is (content is a list of blocks) assistant_content = [block.model_dump() for block in response.content] self.messages.append({"role": "assistant", "content": assistant_content}) - # Build tool_result entries for every tool_use block tool_results = [] for block in response.content: if block.type != "tool_use": continue - - # Find the matching native tool by name - tool = self._native_tools_by_name.get(block.name) - if tool is None: - result_text = f"Error: unknown tool '{block.name}'" - logger.error("Received tool_use for unknown tool: %s", block.name) - else: - try: - result_text = tool.call(block.input) - logger.info( - "🧠 Native tool '%s' executed. Result (first 200 chars): %s", - block.name, - str(result_text)[:200], - ) - except Exception as exc: - result_text = f"Error executing tool '{block.name}': {exc}" - logger.error("Native tool '%s' raised: %s", block.name, exc) - tool_results.append({ "type": "tool_result", "tool_use_id": block.id, @@ -127,18 +131,39 @@ def _dispatch_tool_use(self, response) -> None: def ask(self, message: str) -> LLMAskResponse: self.retries = 0 # reset retries for each ask. Handled in parent class. - self.messages.append({"role": "user", "content": message}) + if self._pending_tool_response: + # Previous response was tool_use — format this message as tool results. + self._append_tool_result(self._pending_tool_response, message) + self._pending_tool_response = None + else: + self.messages.append({"role": "user", "content": message}) valid = False while not valid: response = self._call_api() - # Dispatch any tool_use rounds before looking for a JSON response. - # The model may call the memory tool multiple times before producing - # its final JSON command. - while response.stop_reason == "tool_use": - self._dispatch_tool_use(response) - response = self._call_api() + if response.stop_reason == "tool_use": + # Return tool call info as an LLMAskResponse so the + # caller (MicroBot.run) can dispatch the tool. + self._pending_tool_response = response + + thoughts = "" + for block in response.content: + if block.type == "text": + thoughts = block.text + break + + tool_calls = [] + for block in response.content: + if block.type == "tool_use": + tool_calls.append({ + "name": block.name, + "id": block.id, + "input": block.input, + }) + + command = json.dumps({"native_tool_calls": tool_calls}) + return LLMAskResponse(task_done=False, thoughts=thoughts, command=command) # Extract text content from the final response response_text = "" diff --git a/src/microbots/llm/llm.py b/src/microbots/llm/llm.py index 2800790..e7e5c22 100644 --- a/src/microbots/llm/llm.py +++ b/src/microbots/llm/llm.py @@ -29,6 +29,15 @@ def ask(self, message: str) -> LLMAskResponse: def clear_history(self) -> bool: pass + def upgrade_tools(self, tools: list) -> list: + """Upgrade tools for the specific LLM provider. + + The default implementation is a no-op. Subclasses (e.g. + ``AnthropicApi``) override this to swap provider-agnostic tools + with their native equivalents. + """ + return tools + def _validate_llm_response(self, response: str) -> tuple[bool, LLMAskResponse]: if self.retries >= self.max_retries: diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py index 9cf547e..ef26182 100644 --- a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py +++ b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py @@ -21,8 +21,9 @@ The memory tool (type ``memory_20250818``) is available in the standard Anthropic library and does not require a beta endpoint or header. Pass it via ``tools=[{"type": "memory_20250818", "name": "memory"}]`` on a regular -``client.messages.create(...)`` call. ``AnthropicApi`` handles this -automatically when ``native_tools`` contains an ``AnthropicMemoryTool``. +``client.messages.create(...)`` call. ``MicroBot`` auto-upgrades +``MemoryTool`` to ``AnthropicMemoryTool`` for Anthropic providers and +passes the tool schema to ``AnthropicApi`` via ``tool_dicts``. Usage: from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool @@ -33,6 +34,7 @@ from __future__ import annotations +import json from logging import getLogger from pathlib import Path @@ -48,6 +50,7 @@ BetaMemoryTool20250818ViewCommand, ) +from microbots.environment.Environment import CmdReturn from microbots.tools.tool_definitions.memory_tool import MemoryTool logger = getLogger(__name__) @@ -116,10 +119,36 @@ def is_model_supported(self, model_name: str) -> bool: return "claude" in model_name.lower() def is_invoked(self, command: str) -> bool: - """Return False — this tool is dispatched natively by AnthropicApi, - not via the shell command loop.""" + """Return True when the command is a serialized native_tool_calls JSON + containing a call to the ``memory`` tool.""" + try: + data = json.loads(command) + if "native_tool_calls" in data: + return any(tc["name"] == "memory" for tc in data["native_tool_calls"]) + except (json.JSONDecodeError, KeyError, TypeError): + pass return False + def invoke(self, command: str, parent_bot) -> CmdReturn: + """Execute all memory tool calls in the serialized native_tool_calls batch.""" + data = json.loads(command) + results = [] + for tc in data["native_tool_calls"]: + if tc["name"] != "memory": + continue + try: + result = self.call(tc["input"]) + logger.info( + "\U0001f9e0 Native tool 'memory' executed. Result (first 200 chars): %s", + str(result)[:200], + ) + results.append(str(result)) + except Exception as exc: + logger.error("Native tool 'memory' raised: %s", exc) + results.append(f"Error executing tool 'memory': {exc}") + combined = "\n".join(results) + return CmdReturn(stdout=combined, stderr="", return_code=0) + def clear_all(self) -> None: """Delete all memory files (useful for testing or resetting state).""" self._clear() diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py new file mode 100644 index 0000000..25c717d --- /dev/null +++ b/test/bot/test_memory_tool_integration.py @@ -0,0 +1,439 @@ +"""Tests for the Anthropic memory tool end-to-end flow. + +Unit tests (mocked API): + Verify wiring — auto-upgrade, tool dispatch, and memory file operations + with a mocked Anthropic client. Fast, free, no API key needed. + +Integration tests (real API): + Hit the actual Anthropic API to verify the full round-trip. + Gated behind ``@pytest.mark.anthropic_integration``. + Require ``ANTHROPIC_API_KEY`` in .env. +""" + +import json +import os +import sys +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest +from dotenv import load_dotenv + +load_dotenv() + +sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")) +) + +from microbots import MicroBot, BotRunResult +from microbots.llm.llm import llm_output_format_str +from microbots.tools.tool_definitions.memory_tool import MemoryTool +from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_tool_use_response(tool_name, tool_id, tool_input, thinking_text=""): + """Build a mock Anthropic API response with stop_reason='tool_use'.""" + blocks = [] + + if thinking_text: + text_block = Mock() + text_block.type = "text" + text_block.text = thinking_text + blocks.append(text_block) + + tool_block = Mock() + tool_block.type = "tool_use" + tool_block.name = tool_name + tool_block.id = tool_id + tool_block.input = tool_input + tool_block.model_dump = Mock(return_value={ + "type": "tool_use", + "id": tool_id, + "name": tool_name, + "input": tool_input, + }) + blocks.append(tool_block) + + resp = Mock() + resp.stop_reason = "tool_use" + resp.content = blocks + return resp + + +def _make_end_turn_response(task_done, thoughts, command=""): + """Build a mock Anthropic API response with stop_reason='end_turn'.""" + payload = json.dumps({ + "task_done": task_done, + "thoughts": thoughts, + "command": command, + }) + + text_block = Mock() + text_block.type = "text" + text_block.text = payload + + resp = Mock() + resp.stop_reason = "end_turn" + resp.content = [text_block] + return resp + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolWiring: + """Unit tests — mocked Anthropic client, real tool dispatch and file ops.""" + + @pytest.fixture() + def memory_dir(self, tmp_path): + d = tmp_path / "memory" + d.mkdir() + return d + + @pytest.fixture() + def bot(self, memory_dir): + """Create a MicroBot with Anthropic provider and a MemoryTool. + + The Anthropic client is mocked, but the rest of the stack is real: + auto-upgrade, tool dispatch, and memory file operations. + """ + tool = MemoryTool( + memory_dir=str(memory_dir), + usage_instructions_to_llm="Use the memory tool to persist notes.", + ) + + mock_env = Mock() + mock_env.execute.return_value = Mock(return_code=0, stdout="", stderr="") + + anthropic_deployment = "claude-sonnet-4-5" + + with patch("microbots.llm.anthropic_api.Anthropic") as mock_anthropic_cls, \ + patch("microbots.llm.anthropic_api.api_key", "test-key"), \ + patch("microbots.llm.anthropic_api.endpoint", "https://api.anthropic.com"), \ + patch("microbots.llm.anthropic_api.deployment_name", anthropic_deployment): + + bot = MicroBot( + model=f"anthropic/{anthropic_deployment}", + system_prompt="You are a helpful assistant.", + additional_tools=[tool], + environment=mock_env, + ) + + self._mock_client = mock_anthropic_cls.return_value + yield bot + del bot + + # -- Upgrade verification ----------------------------------------------- + + def test_memory_tool_auto_upgraded_to_anthropic_variant(self, bot): + """MemoryTool passed to MicroBot should be auto-upgraded to AnthropicMemoryTool.""" + upgraded_tools = bot.additional_tools + memory_tools = [t for t in upgraded_tools if isinstance(t, AnthropicMemoryTool)] + assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade" + + def test_tool_dicts_include_memory_schema(self, bot): + """The LLM should have received the memory tool schema.""" + assert len(bot.llm._tool_dicts) == 1 + assert bot.llm._tool_dicts[0]["type"] == "memory_20250818" + + # -- Create file via tool_use ------------------------------------------- + + def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir): + """LLM requests a memory create → MicroBot dispatches → file appears on disk.""" + # Sequence: + # 1. ask(task) → API returns tool_use (memory create) + # 2. ask(tool_result) → API returns end_turn (task_done=True) + self._mock_client.messages.create.side_effect = [ + _make_tool_use_response( + tool_name="memory", + tool_id="tool_001", + tool_input={ + "command": "create", + "path": "/memories/notes.md", + "file_text": "Hello from integration test", + }, + thinking_text="I'll save a note to memory.", + ), + _make_end_turn_response( + task_done=True, + thoughts="Saved a note to memory successfully.", + ), + ] + + result: BotRunResult = bot.run( + "Save a note saying 'Hello from integration test'", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + assert result.error is None + + # Verify the file was actually created on disk + # _resolve("/memories/notes.md") strips the "memories/" prefix → memory_dir/notes.md + created_file = memory_dir / "notes.md" + assert created_file.exists(), f"Expected {created_file} to be created" + assert created_file.read_text() == "Hello from integration test" + + # -- View file via tool_use --------------------------------------------- + + def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir): + """LLM requests a memory view → MicroBot dispatches → file content returned.""" + # Pre-create a file in memory + # _resolve("/memories/existing.md") → memory_dir/existing.md + (memory_dir / "existing.md").write_text("Previously saved content") + + self._mock_client.messages.create.side_effect = [ + _make_tool_use_response( + tool_name="memory", + tool_id="tool_002", + tool_input={ + "command": "view", + "path": "/memories/existing.md", + }, + thinking_text="Let me check my memory.", + ), + _make_end_turn_response( + task_done=True, + thoughts="Found previously saved content in memory.", + ), + ] + + result: BotRunResult = bot.run( + "Check your memory for existing notes", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + + # Verify the view result was passed back to the API as tool_result + calls = self._mock_client.messages.create.call_args_list + assert len(calls) == 2 + # The second call should have messages including the tool_result + second_call_messages = calls[1].kwargs.get("messages") or calls[1][1].get("messages", []) + tool_result_msgs = [ + m for m in second_call_messages + if m.get("role") == "user" and isinstance(m.get("content"), list) + and any(c.get("type") == "tool_result" for c in m["content"]) + ] + assert len(tool_result_msgs) >= 1, "Expected a tool_result message in the second API call" + # The tool_result content should contain the file content + tool_result_content = tool_result_msgs[-1]["content"][0]["content"] + assert "Previously saved content" in tool_result_content + + # -- Multiple tool calls in sequence ------------------------------------ + + def test_create_then_view_memory_file(self, bot, memory_dir): + """LLM creates a file, then views it — both dispatched via MicroBot loop.""" + self._mock_client.messages.create.side_effect = [ + # Step 1: create file + _make_tool_use_response( + tool_name="memory", + tool_id="tool_003", + tool_input={ + "command": "create", + "path": "/memories/todo.md", + "file_text": "- Fix bug #42\n- Write tests", + }, + thinking_text="Creating a todo list.", + ), + # Step 2: view file + _make_tool_use_response( + tool_name="memory", + tool_id="tool_004", + tool_input={ + "command": "view", + "path": "/memories/todo.md", + }, + thinking_text="Let me verify what I wrote.", + ), + # Step 3: done + _make_end_turn_response( + task_done=True, + thoughts="Created and verified the todo list.", + ), + ] + + result: BotRunResult = bot.run( + "Create a todo list and verify it was saved", + max_iterations=10, + timeout_in_seconds=30, + ) + + assert result.status is True + assert result.error is None + + # File should exist with correct content + created_file = memory_dir / "todo.md" + assert created_file.exists() + assert "Fix bug #42" in created_file.read_text() + + # -- Non-memory commands still go to environment ------------------------ + + def test_non_memory_commands_go_to_environment(self, bot): + """Regular shell commands should be dispatched to the environment, not the memory tool.""" + self._mock_client.messages.create.side_effect = [ + _make_end_turn_response( + task_done=False, + thoughts="Let me check the files.", + command="ls -la", + ), + _make_end_turn_response( + task_done=True, + thoughts="Done.", + ), + ] + + result: BotRunResult = bot.run( + "List the files", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + # The environment.execute should have been called with "ls -la" + bot.environment.execute.assert_called_with("ls -la") + + +# --------------------------------------------------------------------------- +# Real integration tests — require ANTHROPIC_API_KEY +# --------------------------------------------------------------------------- + +MEMORY_SYSTEM_PROMPT = f"""You are a helpful assistant with access to a memory tool. +You can save and retrieve notes using the memory tool. +All your responses must be in this JSON format: +{llm_output_format_str} +The properties (task_done, thoughts, command) are mandatory on each response. +When you are done, set task_done to true and command to an empty string. +""" + + +@pytest.mark.anthropic_integration +@pytest.mark.docker +class TestMemoryToolRealApi: + """End-to-end integration tests that hit the real Anthropic API. + + These tests exercise the full MicroBot → AnthropicApi → memory tool + pipeline with no mocking. A real Docker environment is created + (matching the AgentBoss integration test pattern). + + Run with:: + + pytest -m anthropic_integration + + Requires ``ANTHROPIC_API_KEY`` in ``.env``. + """ + + @pytest.fixture() + def memory_dir(self, tmp_path): + d = tmp_path / "memory" + d.mkdir() + return d + + @pytest.fixture() + def memory_bot(self, memory_dir): + """Create a MicroBot with the real Anthropic API, real Docker env, + and a MemoryTool. No mocking — fully end-to-end. + """ + tool = MemoryTool( + memory_dir=str(memory_dir), + usage_instructions_to_llm="Use the memory tool to persist notes.", + ) + + anthropic_deployment = os.getenv("ANTHROPIC_DEPLOYMENT_NAME", "claude-sonnet-4-5") + + bot = MicroBot( + model=f"anthropic/{anthropic_deployment}", + system_prompt=MEMORY_SYSTEM_PROMPT, + additional_tools=[tool], + ) + + yield bot + del bot + + def test_memory_tool_auto_upgraded(self, memory_bot): + """MemoryTool should be silently auto-upgraded to AnthropicMemoryTool.""" + memory_tools = [t for t in memory_bot.additional_tools if isinstance(t, AnthropicMemoryTool)] + assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade" + + def test_create_memory_file(self, memory_bot, memory_dir): + """MicroBot should persist a debugging plan to memory. + + The LLM is expected to: + 1. Receive a task about planning a debugging session. + 2. Decide to persist the plan using the memory tool. + 3. Confirm the task is done. + + We verify the plan was actually written to disk. + """ + result: BotRunResult = memory_bot.run( + task=( + "You are investigating a bug where the server returns HTTP 500 " + "on POST /api/users. Create a debugging plan that includes: " + "1) check server logs, 2) reproduce the request with curl, " + "3) inspect the database connection. " + "Persist this plan so you can resume later if interrupted." + ), + max_iterations=10, + timeout_in_seconds=60, + ) + + assert result.status is True, f"Task failed: {result.error}" + assert result.error is None + + # The LLM should have used the memory tool to persist the plan + saved_files = [f for f in memory_dir.rglob("*") if f.is_file()] + assert len(saved_files) >= 1, ( + f"Expected at least one file created in memory. " + f"Found: {saved_files}" + ) + combined_content = "\n".join(f.read_text() for f in saved_files).lower() + assert "log" in combined_content or "curl" in combined_content or "database" in combined_content, ( + f"Expected debugging plan content in memory files. Content: {combined_content}" + ) + + def test_create_and_view_roundtrip(self, memory_bot, memory_dir): + """MicroBot should save findings and then review them before reporting. + + The LLM is expected to: + 1. Record analysis findings using the memory tool. + 2. Review what it recorded to verify nothing was missed. + 3. Summarize the findings in its final thoughts. + + We verify: + - At least one file was written to disk. + - The LLM's summary references the recorded findings. + """ + result: BotRunResult = memory_bot.run( + task=( + "You analyzed a Python project and found these issues: " + "1) an unused import 'os' in utils.py, " + "2) a missing null check in handler.py line 42. " + "Record these findings, then review your notes and " + "summarize what you found in your final thoughts." + ), + max_iterations=15, + timeout_in_seconds=60, + ) + + assert result.status is True, f"Task failed: {result.error}" + assert result.error is None + + # The LLM should have created at least one memory file + saved_files = [f for f in memory_dir.rglob("*") if f.is_file()] + assert len(saved_files) >= 1, ( + f"Expected at least one file in memory. " + f"Found: {list(memory_dir.rglob('*'))}" + ) + + result_lower = result.result.lower() + assert "import" in result_lower or "null" in result_lower or "handler" in result_lower, ( + f"LLM should have summarized the findings. Got: {result.result}" + ) diff --git a/test/bot/test_upgrade_tools_for_provider.py b/test/bot/test_upgrade_tools_for_provider.py index f5d1e98..459afd4 100644 --- a/test/bot/test_upgrade_tools_for_provider.py +++ b/test/bot/test_upgrade_tools_for_provider.py @@ -1,13 +1,7 @@ -""" -Unit tests for MicroBot._upgrade_tools_for_provider. +"""Unit tests for AnthropicApi.upgrade_tools() method. These tests verify that plain ``MemoryTool`` instances are automatically -replaced with ``AnthropicMemoryTool`` when the model provider is Anthropic, -and that no changes are made for other providers or other tool types. - -All tests bypass the heavy MicroBot constructor (Docker environment, LLM -creation) by constructing an uninitialized instance with ``object.__new__`` -and manually setting only the attributes the method under test needs. +replaced with ``AnthropicMemoryTool`` when using ``AnthropicApi.upgrade_tools``. """ import sys import os @@ -17,8 +11,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) -from microbots.MicroBot import MicroBot -from microbots.constants import ModelProvider +from microbots.llm.anthropic_api import AnthropicApi from microbots.tools.tool_definitions.memory_tool import MemoryTool from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool @@ -27,15 +20,6 @@ # Helpers # --------------------------------------------------------------------------- -def _bare_microbot(model_provider: str, tools: list) -> MicroBot: - """Return an uninitialized MicroBot with only the attributes that - ``_upgrade_tools_for_provider`` inspects.""" - bot = object.__new__(MicroBot) - bot.model_provider = model_provider - bot.additional_tools = list(tools) - return bot - - def _memory_tool(tmp_path, instructions: str = "default instructions") -> MemoryTool: return MemoryTool( memory_dir=str(tmp_path / "memory"), @@ -50,112 +34,84 @@ def _memory_tool(tmp_path, instructions: str = "default instructions") -> Memory @pytest.mark.unit class TestUpgradeToolsForProvider: - # -- Anthropic provider: MemoryTool → AnthropicMemoryTool --------------- + @pytest.fixture(autouse=True) + def _create_api(self): + with patch("microbots.llm.anthropic_api.Anthropic"): + self.api = AnthropicApi(system_prompt="test") + + # -- AnthropicApi.upgrade_tools: MemoryTool → AnthropicMemoryTool -------- def test_memory_tool_is_replaced_with_anthropic_variant(self, tmp_path): tool = _memory_tool(tmp_path) - bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool]) - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([tool]) - assert len(bot.additional_tools) == 1 - assert isinstance(bot.additional_tools[0], AnthropicMemoryTool) + assert len(upgraded) == 1 + assert isinstance(upgraded[0], AnthropicMemoryTool) def test_memory_dir_is_forwarded_to_upgraded_tool(self, tmp_path): mem_dir = str(tmp_path / "my_memory") tool = MemoryTool(memory_dir=mem_dir) - bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool]) - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([tool]) - upgraded = bot.additional_tools[0] - assert isinstance(upgraded, AnthropicMemoryTool) - assert str(upgraded.memory_dir) == mem_dir + assert isinstance(upgraded[0], AnthropicMemoryTool) + assert str(upgraded[0].memory_dir) == mem_dir def test_usage_instructions_are_forwarded_to_upgraded_tool(self, tmp_path): custom_instructions = "custom memory instructions for test" tool = _memory_tool(tmp_path, instructions=custom_instructions) - bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool]) - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([tool]) - upgraded = bot.additional_tools[0] - assert upgraded.usage_instructions_to_llm == custom_instructions + assert upgraded[0].usage_instructions_to_llm == custom_instructions def test_already_anthropic_memory_tool_is_not_re_upgraded(self, tmp_path): existing = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory")) - bot = _bare_microbot(ModelProvider.ANTHROPIC, [existing]) - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([existing]) - assert len(bot.additional_tools) == 1 - assert bot.additional_tools[0] is existing + assert len(upgraded) == 1 + assert upgraded[0] is existing def test_non_memory_tools_are_kept_unchanged(self, tmp_path): other_tool = Mock() other_tool.__class__ = Mock # not a MemoryTool subclass - bot = _bare_microbot(ModelProvider.ANTHROPIC, [other_tool]) - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([other_tool]) - assert len(bot.additional_tools) == 1 - assert bot.additional_tools[0] is other_tool + assert len(upgraded) == 1 + assert upgraded[0] is other_tool def test_mixed_tool_list_upgrades_only_memory_tools(self, tmp_path): plain_memory = _memory_tool(tmp_path) already_upgraded = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory2")) other_tool = Mock(spec=[]) - bot = _bare_microbot(ModelProvider.ANTHROPIC, [plain_memory, already_upgraded, other_tool]) - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([plain_memory, already_upgraded, other_tool]) - assert len(bot.additional_tools) == 3 + assert len(upgraded) == 3 # first: should have been upgraded - assert isinstance(bot.additional_tools[0], AnthropicMemoryTool) - assert bot.additional_tools[0] is not plain_memory + assert isinstance(upgraded[0], AnthropicMemoryTool) + assert upgraded[0] is not plain_memory # second: already AnthropicMemoryTool, untouched - assert bot.additional_tools[1] is already_upgraded + assert upgraded[1] is already_upgraded # third: non-memory tool, untouched - assert bot.additional_tools[2] is other_tool + assert upgraded[2] is other_tool def test_empty_tool_list_is_a_no_op(self): - bot = _bare_microbot(ModelProvider.ANTHROPIC, []) - - bot._upgrade_tools_for_provider() + upgraded = self.api.upgrade_tools([]) - assert bot.additional_tools == [] + assert upgraded == [] def test_logger_info_called_for_each_upgraded_tool(self, tmp_path, caplog): tool1 = _memory_tool(tmp_path) tmp_path2 = tmp_path / "sub" tmp_path2.mkdir() tool2 = _memory_tool(tmp_path2) - bot = _bare_microbot(ModelProvider.ANTHROPIC, [tool1, tool2]) - with caplog.at_level(logging.INFO, logger=" MicroBot "): - bot._upgrade_tools_for_provider() + with caplog.at_level(logging.INFO): + self.api.upgrade_tools([tool1, tool2]) upgrade_logs = [r for r in caplog.records if "Auto-upgrading" in r.message] assert len(upgrade_logs) == 2 - - # -- Non-Anthropic providers: no upgrade should happen ------------------ - - @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL]) - def test_no_upgrade_for_non_anthropic_provider(self, tmp_path, provider): - tool = _memory_tool(tmp_path) - bot = _bare_microbot(provider, [tool]) - - bot._upgrade_tools_for_provider() - - assert len(bot.additional_tools) == 1 - assert isinstance(bot.additional_tools[0], MemoryTool) - assert not isinstance(bot.additional_tools[0], AnthropicMemoryTool) - - @pytest.mark.parametrize("provider", [ModelProvider.OPENAI, ModelProvider.OLLAMA_LOCAL]) - def test_original_tool_identity_preserved_for_non_anthropic(self, tmp_path, provider): - tool = _memory_tool(tmp_path) - bot = _bare_microbot(provider, [tool]) - - bot._upgrade_tools_for_provider() - - assert bot.additional_tools[0] is tool diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py index 49674aa..2c11966 100644 --- a/test/llm/test_anthropic_api.py +++ b/test/llm/test_anthropic_api.py @@ -540,63 +540,37 @@ def test_anthropic_api_clear_history_integration(self): # ============================================================================ @pytest.mark.unit -class TestAnthropicApiNativeToolsInit: - """Tests for __init__ native_tools caching.""" +class TestAnthropicApiToolDictsInit: + """Tests for __init__ tool upgrade and tool_dicts extraction.""" @pytest.fixture(autouse=True) def _use_patch(self, patch_anthropic_config): pass - def test_init_without_native_tools_has_empty_caches(self): + def test_init_without_additional_tools_has_empty_tool_dicts(self): api = AnthropicApi(system_prompt="test") - assert api.native_tools == [] - assert api._native_tool_dicts == [] - assert api._native_tools_by_name == {} + assert api._tool_dicts == [] - def test_init_with_none_native_tools_has_empty_caches(self): - api = AnthropicApi(system_prompt="test", native_tools=None) + def test_init_with_none_additional_tools_has_empty_tool_dicts(self): + api = AnthropicApi(system_prompt="test", additional_tools=None) - assert api._native_tool_dicts == [] - assert api._native_tools_by_name == {} + assert api._tool_dicts == [] - def test_init_with_single_native_tool_caches_dict(self): + def test_init_with_tool_having_to_dict_extracts_dicts(self): tool = Mock() tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"} + # Ensure it's not a MemoryTool so upgrade_tools won't touch it + tool.__class__ = Mock - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + api = AnthropicApi(system_prompt="test", additional_tools=[tool]) - assert api._native_tool_dicts == [{"name": "memory", "type": "memory_20250818"}] + assert api._tool_dicts == [{"name": "memory", "type": "memory_20250818"}] - def test_init_with_single_native_tool_caches_by_name(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - - api = AnthropicApi(system_prompt="test", native_tools=[tool]) - - assert "memory" in api._native_tools_by_name - assert api._native_tools_by_name["memory"] is tool - - def test_init_with_multiple_native_tools_caches_all(self): - tool1 = Mock() - tool1.to_dict.return_value = {"name": "memory"} - tool2 = Mock() - tool2.to_dict.return_value = {"name": "bash"} - - api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2]) - - assert len(api._native_tool_dicts) == 2 - assert api._native_tools_by_name["memory"] is tool1 - assert api._native_tools_by_name["bash"] is tool2 - - def test_init_calls_to_dict_exactly_once_per_tool(self): - """to_dict() must not be called again on subsequent API calls.""" - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - - AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_init_sets_pending_tool_response_to_none(self): + api = AnthropicApi(system_prompt="test") - assert tool.to_dict.call_count == 1 + assert api._pending_tool_response is None @pytest.mark.unit @@ -617,10 +591,10 @@ def test_call_api_without_tools_omits_tools_kwarg(self): call_kwargs = api.ai_client.messages.create.call_args[1] assert "tools" not in call_kwargs - def test_call_api_with_tools_passes_cached_dicts(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"} - api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool]) + def test_call_api_with_tool_dicts_passes_them(self): + dicts = [{"name": "memory", "type": "memory_20250818"}] + api = AnthropicApi(system_prompt="test", deployment_name="claude-3") + api._tool_dicts = dicts api.messages = [{"role": "user", "content": "hello"}] api.ai_client.messages.create = Mock(return_value=Mock()) @@ -628,35 +602,17 @@ def test_call_api_with_tools_passes_cached_dicts(self): call_kwargs = api.ai_client.messages.create.call_args[1] assert "tools" in call_kwargs - assert call_kwargs["tools"] == [{"name": "memory", "type": "memory_20250818"}] - - def test_call_api_does_not_call_to_dict_again(self): - """to_dict() should only be called during __init__, never during _call_api.""" - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - api = AnthropicApi(system_prompt="test", deployment_name="claude-3", native_tools=[tool]) - api.messages = [{"role": "user", "content": "hello"}] - api.ai_client.messages.create = Mock(return_value=Mock()) - - count_after_init = tool.to_dict.call_count # should be 1 - api._call_api() - api._call_api() - - assert tool.to_dict.call_count == count_after_init # no increase + assert call_kwargs["tools"] == dicts @pytest.mark.unit -class TestAnthropicApiDispatchToolUse: - """Tests for _dispatch_tool_use.""" +class TestAnthropicApiAppendToolResult: + """Tests for _append_tool_result.""" @pytest.fixture(autouse=True) def _use_patch(self, patch_anthropic_config): pass - # ------------------------------------------------------------------ # - # Helpers - # ------------------------------------------------------------------ # - @staticmethod def _tool_use_block(name, tool_id="tu_001", input_data=None): block = Mock() @@ -675,31 +631,21 @@ def _text_block(text="hello"): block.model_dump.return_value = {"type": "text", "text": text} return block - # ------------------------------------------------------------------ # - # Tests - # ------------------------------------------------------------------ # - - def test_dispatch_appends_assistant_message_first(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "ok" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_appends_assistant_message_first(self): + api = AnthropicApi(system_prompt="test") response = Mock() response.content = [self._tool_use_block("memory")] - api._dispatch_tool_use(response) + api._append_tool_result(response, "ok") assert api.messages[0]["role"] == "assistant" - def test_dispatch_appends_tool_result_user_message(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "file listing" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_appends_tool_result_user_message(self): + api = AnthropicApi(system_prompt="test") response = Mock() response.content = [self._tool_use_block("memory", tool_id="tu_abc")] - api._dispatch_tool_use(response) + api._append_tool_result(response, "file listing") user_msg = api.messages[1] assert user_msg["role"] == "user" @@ -707,100 +653,53 @@ def test_dispatch_appends_tool_result_user_message(self): assert user_msg["content"][0]["tool_use_id"] == "tu_abc" assert user_msg["content"][0]["content"] == "file listing" - def test_dispatch_calls_tool_with_correct_input(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "ok" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) - - input_data = {"command": "view", "path": "/memories"} - response = Mock() - response.content = [self._tool_use_block("memory", input_data=input_data)] - api._dispatch_tool_use(response) - - tool.call.assert_called_once_with(input_data) - - def test_dispatch_unknown_tool_returns_error_in_result(self): - api = AnthropicApi(system_prompt="test") # no native tools - - response = Mock() - response.content = [self._tool_use_block("unknown_tool", tool_id="tu_err")] - api._dispatch_tool_use(response) - - content = api.messages[1]["content"][0]["content"] - assert "Error" in content - assert "unknown_tool" in content - - def test_dispatch_tool_exception_returns_error_message(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.side_effect = RuntimeError("disk full") - api = AnthropicApi(system_prompt="test", native_tools=[tool]) - - response = Mock() - response.content = [self._tool_use_block("memory", tool_id="tu_exc")] - api._dispatch_tool_use(response) - - content = api.messages[1]["content"][0]["content"] - assert "Error" in content - assert "disk full" in content - - def test_dispatch_skips_non_tool_use_content_blocks(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "result" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_skips_non_tool_use_content_blocks(self): + api = AnthropicApi(system_prompt="test") response = Mock() response.content = [ self._text_block("thinking..."), self._tool_use_block("memory", tool_id="tu_only"), ] - api._dispatch_tool_use(response) + api._append_tool_result(response, "result") tool_results = api.messages[1]["content"] assert len(tool_results) == 1 assert tool_results[0]["tool_use_id"] == "tu_only" - def test_dispatch_handles_multiple_tool_use_blocks(self): - tool1 = Mock() - tool1.to_dict.return_value = {"name": "memory"} - tool1.call.return_value = "memory result" - tool2 = Mock() - tool2.to_dict.return_value = {"name": "bash"} - tool2.call.return_value = "bash result" - api = AnthropicApi(system_prompt="test", native_tools=[tool1, tool2]) + def test_handles_multiple_tool_use_blocks(self): + api = AnthropicApi(system_prompt="test") response = Mock() response.content = [ self._tool_use_block("memory", tool_id="id_1"), self._tool_use_block("bash", tool_id="id_2"), ] - api._dispatch_tool_use(response) + api._append_tool_result(response, "combined result") results = api.messages[1]["content"] assert len(results) == 2 assert results[0]["tool_use_id"] == "id_1" - assert results[0]["content"] == "memory result" + assert results[0]["content"] == "combined result" assert results[1]["tool_use_id"] == "id_2" - assert results[1]["content"] == "bash result" + assert results[1]["content"] == "combined result" @pytest.mark.unit -class TestAnthropicApiAskWithToolUseLoop: - """Tests for ask() cycling through tool_use rounds before returning JSON.""" +class TestAnthropicApiAskWithToolUse: + """Tests for ask() returning tool_use as LLMAskResponse and accepting tool results.""" @pytest.fixture(autouse=True) def _use_patch(self, patch_anthropic_config): pass @staticmethod - def _tool_use_response(tool_name, tool_id): + def _tool_use_response(tool_name, tool_id, input_data=None): block = Mock() block.type = "tool_use" block.name = tool_name block.id = tool_id - block.input = {} + block.input = input_data or {} block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": tool_name} response = Mock() response.stop_reason = "tool_use" @@ -818,40 +717,61 @@ def _text_response(json_dict): response.content = [block] return response - def test_ask_dispatches_one_tool_use_round_then_returns(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "viewed /memories" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_ask_returns_tool_use_as_ask_response(self): + dicts = [{"name": "memory", "type": "memory_20250818"}] + api = AnthropicApi(system_prompt="test") + api._tool_dicts = dicts - tool_resp = self._tool_use_response("memory", "tu_1") - final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""}) - api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp]) + tool_resp = self._tool_use_response("memory", "tu_1", {"command": "view", "path": "/memories"}) + api.ai_client.messages.create = Mock(return_value=tool_resp) result = api.ask("do the task") - assert api.ai_client.messages.create.call_count == 2 - tool.call.assert_called_once() - assert result.command == "ls /" + assert result.task_done is False + assert '"native_tool_calls"' in result.command + parsed = json.loads(result.command) + assert parsed["native_tool_calls"][0]["name"] == "memory" + assert parsed["native_tool_calls"][0]["id"] == "tu_1" + assert api._pending_tool_response is tool_resp - def test_ask_dispatches_multiple_tool_use_rounds(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "ok" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_ask_stores_pending_tool_response(self): + api = AnthropicApi(system_prompt="test") + api._tool_dicts = [{"name": "memory"}] - tool_resp1 = self._tool_use_response("memory", "tu_1") - tool_resp2 = self._tool_use_response("memory", "tu_2") - final_resp = self._text_response({"task_done": True, "command": "", "thoughts": "done"}) - api.ai_client.messages.create = Mock(side_effect=[tool_resp1, tool_resp2, final_resp]) + tool_resp = self._tool_use_response("memory", "tu_1") + api.ai_client.messages.create = Mock(return_value=tool_resp) - result = api.ask("do the task") + api.ask("do it") - assert api.ai_client.messages.create.call_count == 3 - assert tool.call.call_count == 2 - assert result.task_done is True + assert api._pending_tool_response is tool_resp - def test_ask_without_tool_use_does_not_dispatch(self): + def test_ask_with_pending_tool_response_formats_tool_result(self): + api = AnthropicApi(system_prompt="test") + api._tool_dicts = [{"name": "memory"}] + + tool_resp = self._tool_use_response("memory", "tu_1") + final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""}) + api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp]) + + # First ask — returns tool_use + api.ask("do the task") + + # Second ask — sends tool result, formats as tool_result + result = api.ask("viewed /memories") + + assert result.command == "ls /" + assert api._pending_tool_response is None + + # Check messages contain the tool_result + tool_result_msgs = [ + m for m in api.messages + if m["role"] == "user" and isinstance(m["content"], list) + ] + assert len(tool_result_msgs) == 1 + assert tool_result_msgs[0]["content"][0]["type"] == "tool_result" + assert tool_result_msgs[0]["content"][0]["tool_use_id"] == "tu_1" + + def test_ask_without_tool_use_works_normally(self): api = AnthropicApi(system_prompt="test") final_resp = self._text_response({"task_done": False, "command": "pwd", "thoughts": ""}) @@ -861,23 +781,34 @@ def test_ask_without_tool_use_does_not_dispatch(self): assert api.ai_client.messages.create.call_count == 1 assert result.command == "pwd" + assert api._pending_tool_response is None - def test_ask_tool_use_messages_are_added_to_history(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory"} - tool.call.return_value = "result" - api = AnthropicApi(system_prompt="test", native_tools=[tool]) + def test_ask_extracts_thoughts_from_tool_use_response(self): + api = AnthropicApi(system_prompt="test") + api._tool_dicts = [{"name": "memory"}] - tool_resp = self._tool_use_response("memory", "tu_1") - final_resp = self._text_response({"task_done": False, "command": "echo hi", "thoughts": ""}) - api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp]) + # Build a tool_use response with a text block for thoughts + text_block = Mock() + text_block.type = "text" + text_block.text = "Let me check memory first" + text_block.model_dump.return_value = {"type": "text", "text": text_block.text} - api.ask("do it") + tool_block = Mock() + tool_block.type = "tool_use" + tool_block.name = "memory" + tool_block.id = "tu_1" + tool_block.input = {} + tool_block.model_dump.return_value = {"type": "tool_use", "id": "tu_1", "name": "memory"} + + response = Mock() + response.stop_reason = "tool_use" + response.content = [text_block, tool_block] + + api.ai_client.messages.create = Mock(return_value=response) + + result = api.ask("do the task") - # Messages: user, assistant(tool_use), user(tool_result), assistant(final json) - roles = [m["role"] for m in api.messages] - assert roles.count("user") == 2 - assert roles.count("assistant") == 2 + assert result.thoughts == "Let me check memory first" if __name__ == "__main__": diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py index db7069e..ab27a68 100644 --- a/test/tools/tool_definitions/test_anthropic_memory_tool.py +++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py @@ -101,7 +101,19 @@ def test_returns_false_for_non_claude_models(self, tmp_path): @pytest.mark.unit class TestAnthropicMemoryToolIsInvoked: - def test_always_returns_false(self, tmp_path): + def test_returns_true_for_native_tool_calls_with_memory(self, tmp_path): + tool = make_tool(tmp_path) + import json + cmd = json.dumps({"native_tool_calls": [{"name": "memory", "id": "tu_1", "input": {}}]}) + assert tool.is_invoked(cmd) is True + + def test_returns_false_for_native_tool_calls_without_memory(self, tmp_path): + tool = make_tool(tmp_path) + import json + cmd = json.dumps({"native_tool_calls": [{"name": "bash", "id": "tu_1", "input": {}}]}) + assert tool.is_invoked(cmd) is False + + def test_returns_false_for_plain_commands(self, tmp_path): tool = make_tool(tmp_path) for cmd in ("memory view /memories", "memory clear", "anything", ""): assert tool.is_invoked(cmd) is False From c7d2657a448580e706c7e9d4b203e4ae19ccb056 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Wed, 11 Mar 2026 06:15:00 +0000 Subject: [PATCH 09/20] enhance MemoryTool path validation and logging --- .../tools/tool_definitions/__init__.py | 2 + .../tools/tool_definitions/memory_tool.py | 17 ++++-- test/llm/test_llm.py | 30 +++++++++- .../test_anthropic_memory_tool.py | 55 +++++++++++++++++++ 4 files changed, 98 insertions(+), 6 deletions(-) create mode 100644 src/microbots/tools/tool_definitions/__init__.py diff --git a/src/microbots/tools/tool_definitions/__init__.py b/src/microbots/tools/tool_definitions/__init__.py new file mode 100644 index 0000000..88acf0d --- /dev/null +++ b/src/microbots/tools/tool_definitions/__init__.py @@ -0,0 +1,2 @@ +from microbots.tools.tool_definitions.memory_tool import MemoryTool +from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool \ No newline at end of file diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py index 310f880..b63f3ff 100644 --- a/src/microbots/tools/tool_definitions/memory_tool.py +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -1,4 +1,5 @@ import logging +import os import shlex import shutil from pathlib import Path @@ -110,19 +111,22 @@ def is_model_supported(self, model_name: str) -> bool: def _resolve(self, path: str) -> Path: """Resolve a /memories/… path to an absolute host path.""" stripped = path.lstrip("/") + + # Reject any path containing '..' components before resolving + if ".." in Path(stripped).parts: + raise ValueError(f"Path traversal not allowed: {path!r}") + if stripped == "memories": rel = "" elif stripped.startswith("memories/"): rel = stripped[len("memories/"):] - elif stripped.startswith(("workdir", "home", "tmp", "var", "etc", "usr")): - raise ValueError( - f"Invalid memory path: {path!r}. Use paths under /memories/." - ) else: rel = stripped # treat as relative to memory_dir resolved = (self._memory_dir / rel).resolve() if rel else self._memory_dir.resolve() - if not str(resolved).startswith(str(self._memory_dir.resolve())): + # Use trailing separator to prevent prefix confusion with sibling dirs + memory_root = str(self._memory_dir.resolve()) + if resolved != self._memory_dir.resolve() and not str(resolved).startswith(memory_root + os.sep): raise ValueError(f"Path traversal not allowed: {path!r}") return resolved @@ -184,6 +188,7 @@ def _view(self, args: list) -> CmdReturn: elif args[i] == "--end" and i + 1 < len(args): end_line = int(args[i + 1]); i += 2 else: + logger.warning("🧠 MemoryTool view: unknown flag %r (skipped)", args[i]) i += 1 resolved = self._resolve(path) @@ -232,6 +237,7 @@ def _str_replace(self, args: list) -> CmdReturn: elif args[i] == "--new" and i + 1 < len(args): new_text = args[i + 1]; i += 2 else: + logger.warning("🧠 MemoryTool str_replace: unknown flag %r (skipped)", args[i]) i += 1 if old_text is None or new_text is None: return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1) @@ -259,6 +265,7 @@ def _insert(self, args: list) -> CmdReturn: elif args[i] == "--text" and i + 1 < len(args): text = args[i + 1]; i += 2 else: + logger.warning("🧠 MemoryTool insert: unknown flag %r (skipped)", args[i]) i += 1 if line_num is None or text is None: return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1) diff --git a/test/llm/test_llm.py b/test/llm/test_llm.py index bd1b82d..af5a7d2 100644 --- a/test/llm/test_llm.py +++ b/test/llm/test_llm.py @@ -742,4 +742,32 @@ def test_task_done_true_with_not_none_command_field(self, llm): assert llm_response is None assert llm.retries == 1 assert len(llm.messages) == 1 - assert "When 'task_done' is true, 'command' should be an empty string." in llm.messages[0]["content"] \ No newline at end of file + + +@pytest.mark.unit +class TestUpgradeToolsDefault: + """Tests for the default LLMInterface.upgrade_tools no-op implementation.""" + + @pytest.fixture + def llm(self): + return ConcreteLLM() + + def test_returns_same_list(self, llm): + """Default upgrade_tools returns the input list unchanged.""" + tools = ["tool_a", "tool_b"] + result = llm.upgrade_tools(tools) + assert result is tools + + def test_empty_list(self, llm): + """Default upgrade_tools handles an empty list.""" + tools = [] + result = llm.upgrade_tools(tools) + assert result == [] + + def test_preserves_tool_order_and_identity(self, llm): + """Default upgrade_tools does not reorder or copy elements.""" + sentinel = object() + tools = [sentinel, "other"] + result = llm.upgrade_tools(tools) + assert result[0] is sentinel + assert result[1] == "other" \ No newline at end of file diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py index ab27a68..81db12c 100644 --- a/test/tools/tool_definitions/test_anthropic_memory_tool.py +++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py @@ -364,3 +364,58 @@ def test_rename_raises_runtime_error_on_failure(self, tmp_path): ) with pytest.raises(RuntimeError): tool.rename(cmd) + + +# --------------------------------------------------------------------------- +# invoke — non-memory tool calls are skipped +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolInvoke: + + def test_invoke_skips_non_memory_tool_calls(self, tmp_path): + """The ``if tc["name"] != "memory": continue`` branch is exercised + when native_tool_calls contains a non-memory tool.""" + import json + from unittest.mock import Mock + + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello") + + command = json.dumps({ + "native_tool_calls": [ + {"name": "bash", "id": "tu_1", "input": {"command": "ls"}}, + {"name": "memory", "id": "tu_2", "input": { + "command": "view", "path": "/memories/f.md", "view_range": None, + }}, + ] + }) + + result = tool.invoke(command, parent_bot=Mock()) + + assert result.return_code == 0 + # Only the memory call should produce output; bash should be skipped + assert "hello" in result.stdout + + def test_invoke_catches_exception_from_tool_call(self, tmp_path): + """The ``except Exception`` branch is exercised when tool.call() raises.""" + import json + from unittest.mock import Mock, patch + + tool = make_tool(tmp_path) + + command = json.dumps({ + "native_tool_calls": [ + {"name": "memory", "id": "tu_1", "input": { + "command": "view", "path": "/memories/nonexistent.md", "view_range": None, + }}, + ] + }) + + # Force call() to raise an exception + with patch.object(tool, "call", side_effect=RuntimeError("boom")): + result = tool.invoke(command, parent_bot=Mock()) + + assert result.return_code == 0 + assert "Error executing tool 'memory'" in result.stdout + assert "boom" in result.stdout From 3067b59e77f40197fc0c70ae101205a2dd13ec61 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Wed, 11 Mar 2026 07:39:00 +0000 Subject: [PATCH 10/20] enhance path validation --- src/microbots/tools/tool_definitions/memory_tool.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py index b63f3ff..8131b8b 100644 --- a/src/microbots/tools/tool_definitions/memory_tool.py +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -101,9 +101,6 @@ def __post_init__(self): self._memory_dir = base self._memory_dir.mkdir(parents=True, exist_ok=True) - def is_model_supported(self, model_name: str) -> bool: - return True - # ---------------------------------------------------------------------- # # Path helpers # ---------------------------------------------------------------------- # @@ -116,6 +113,11 @@ def _resolve(self, path: str) -> Path: if ".." in Path(stripped).parts: raise ValueError(f"Path traversal not allowed: {path!r}") + if path.startswith("/") and stripped != "memories" and not stripped.startswith("memories/"): + raise ValueError( + f"Invalid memory path: {path!r}. Use paths under /memories/." + ) + if stripped == "memories": rel = "" elif stripped.startswith("memories/"): From 1ec3251864619175fa1a274a52b5072382fb2be0 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Wed, 11 Mar 2026 08:37:11 +0000 Subject: [PATCH 11/20] Add generic memory tool --- src/microbots/llm/anthropic_api.py | 27 +- .../tools/tool_definitions/__init__.py | 3 +- .../tool_definitions/anthropic_memory_tool.py | 213 --------- .../tools/tool_definitions/memory_tool.py | 4 +- test/bot/test_memory_tool_integration.py | 133 ++---- test/bot/test_upgrade_tools_for_provider.py | 117 ----- .../test_anthropic_memory_tool.py | 421 ------------------ 7 files changed, 46 insertions(+), 872 deletions(-) delete mode 100644 src/microbots/tools/tool_definitions/anthropic_memory_tool.py delete mode 100644 test/bot/test_upgrade_tools_for_provider.py delete mode 100644 test/tools/tool_definitions/test_anthropic_memory_tool.py diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py index a403f9c..2da9bab 100644 --- a/src/microbots/llm/anthropic_api.py +++ b/src/microbots/llm/anthropic_api.py @@ -21,25 +21,6 @@ class AnthropicApi(LLMInterface): - def upgrade_tools(self, tools: list) -> list: - """Replace ``MemoryTool`` with ``AnthropicMemoryTool`` for native tool-use.""" - from microbots.tools.tool_definitions.memory_tool import MemoryTool - from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool - - upgraded = [] - for tool in tools: - if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool): - logger.info( - "\U0001f9e0 Auto-upgrading MemoryTool \u2192 AnthropicMemoryTool for Anthropic provider" - ) - upgraded.append(AnthropicMemoryTool( - memory_dir=tool.memory_dir, - usage_instructions=tool.usage_instructions_to_llm, - )) - else: - upgraded.append(tool) - return upgraded - def __init__( self, system_prompt: str, @@ -57,9 +38,8 @@ def __init__( max_retries : int Maximum number of retries for invalid LLM responses. additional_tools : Optional[List] - Tool objects passed from MicroBot. Any provider-agnostic tools - (e.g. ``MemoryTool``) are silently upgraded to their Anthropic- - native variants, and their API schemas are extracted. + Tool objects passed from MicroBot. Any tools exposing a native + Anthropic schema via ``to_dict()`` are forwarded to the API. """ self.ai_client = Anthropic( api_key=api_key, @@ -69,10 +49,9 @@ def __init__( self.system_prompt = system_prompt self.messages = [] - # Silently upgrade tools in-place and extract API schemas + # Preserve tool instances as provided and extract native Anthropic schemas. tools = additional_tools or [] upgraded = self.upgrade_tools(tools) - # Mutate the original list so the caller (MicroBot) sees upgraded tools if additional_tools is not None: additional_tools[:] = upgraded self._tool_dicts = [ diff --git a/src/microbots/tools/tool_definitions/__init__.py b/src/microbots/tools/tool_definitions/__init__.py index 88acf0d..44e7592 100644 --- a/src/microbots/tools/tool_definitions/__init__.py +++ b/src/microbots/tools/tool_definitions/__init__.py @@ -1,2 +1 @@ -from microbots.tools.tool_definitions.memory_tool import MemoryTool -from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool \ No newline at end of file +from microbots.tools.tool_definitions.memory_tool import MemoryTool \ No newline at end of file diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py deleted file mode 100644 index ef26182..0000000 --- a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py +++ /dev/null @@ -1,213 +0,0 @@ -""" -AnthropicMemoryTool — wraps Anthropic's memory tool. - -The memory tool lets the model persist information across conversations by -reading and writing files in a local memory directory. When the model invokes -the tool, it sends a command (view, create, str_replace, insert, delete, -rename) and the client executes it against a local filesystem directory. - -This implementation extends both: - - ``MemoryTool``: provides all file-operation logic (_resolve, _view, - _create, _str_replace, _insert, _delete, _rename, _clear) and satisfies - the ``ToolAbstract`` ABC (install_tool, verify_tool_installation, etc.). - - ``BetaAbstractMemoryTool`` (SDK): provides native Anthropic dispatch and - the ``to_dict()`` / ``call()`` interface required by AnthropicApi. - -The SDK command-handler overrides (view, create, str_replace, insert, delete, -rename) simply translate SDK command objects → arg lists and delegate to the -inherited MemoryTool private methods, converting the CmdReturn back to a -string as the SDK expects. - -The memory tool (type ``memory_20250818``) is available in the standard -Anthropic library and does not require a beta endpoint or header. Pass it -via ``tools=[{"type": "memory_20250818", "name": "memory"}]`` on a regular -``client.messages.create(...)`` call. ``MicroBot`` auto-upgrades -``MemoryTool`` to ``AnthropicMemoryTool`` for Anthropic providers and -passes the tool schema to ``AnthropicApi`` via ``tool_dicts``. - -Usage: - from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool - - memory = AnthropicMemoryTool() - bot = ReadingBot(..., additional_tools=[memory]) -""" - -from __future__ import annotations - -import json -from logging import getLogger -from pathlib import Path - -from typing_extensions import override - -from anthropic.lib.tools import BetaAbstractMemoryTool as _SDKMemoryTool -from anthropic.types.beta import ( - BetaMemoryTool20250818CreateCommand, - BetaMemoryTool20250818DeleteCommand, - BetaMemoryTool20250818InsertCommand, - BetaMemoryTool20250818RenameCommand, - BetaMemoryTool20250818StrReplaceCommand, - BetaMemoryTool20250818ViewCommand, -) - -from microbots.environment.Environment import CmdReturn -from microbots.tools.tool_definitions.memory_tool import MemoryTool - -logger = getLogger(__name__) - -DEFAULT_MEMORY_INSTRUCTIONS = ( - "MEMORY PROTOCOL:\n" - "1. ALWAYS view your memory directory BEFORE doing anything else " - "using the `view` command of your `memory` tool to check for earlier progress.\n" - "2. As you make progress on the task, record status, progress, " - "and key findings in your memory using the memory tool.\n" - "3. ASSUME INTERRUPTION: Your context window might be reset at any moment, " - "so you risk losing any progress that is not recorded in your memory directory.\n" - "4. Before completing a task, always save your final results and analysis to memory.\n" - "5. When editing your memory folder, always keep its content up-to-date, coherent " - "and organized. Rename or delete files that are no longer relevant. " - "Do not create new files unless necessary.\n\n" - "IMPORTANT: The memory tool ONLY works with paths under /memories/. " - "Do NOT use the memory tool to access the repository or workdir. " - "Use shell commands (ls, cat, etc.) for filesystem access." -) - - -class AnthropicMemoryTool(MemoryTool, _SDKMemoryTool): - """ - Anthropic's built-in memory tool, backed by MemoryTool's file logic. - - Inherits file-operation logic from ``MemoryTool`` (plain Python class) and - the SDK's native dispatch interface from ``BetaAbstractMemoryTool``. - - The SDK command-handler overrides delegate to the inherited private methods - (``_view``, ``_create``, etc.), translating the SDK ``Command`` objects to - the ``args: list`` format that those methods expect, and converting the - returned ``CmdReturn`` to the string that the SDK API requires. - - Parameters - ---------- - memory_dir : str | Path | None - Root directory for memory files. Defaults to ``~/.microbots/memory``. - usage_instructions : str | None - Custom instructions appended to the system prompt for the LLM. - Defaults to ``DEFAULT_MEMORY_INSTRUCTIONS``. - """ - - def __init__( - self, - memory_dir: str | Path | None = None, - usage_instructions: str | None = None, - ) -> None: - MemoryTool.__init__( - self, - memory_dir=str(memory_dir) if memory_dir else None, - usage_instructions_to_llm=( - usage_instructions - if usage_instructions is not None - else DEFAULT_MEMORY_INSTRUCTIONS - ), - ) - _SDKMemoryTool.__init__(self) # type: ignore[call-arg] - - # ---------------------------------------------------------------------- # - # ToolAbstract overrides - # ---------------------------------------------------------------------- # - - def is_model_supported(self, model_name: str) -> bool: - """Only Anthropic (Claude) models support the native memory tool.""" - return "claude" in model_name.lower() - - def is_invoked(self, command: str) -> bool: - """Return True when the command is a serialized native_tool_calls JSON - containing a call to the ``memory`` tool.""" - try: - data = json.loads(command) - if "native_tool_calls" in data: - return any(tc["name"] == "memory" for tc in data["native_tool_calls"]) - except (json.JSONDecodeError, KeyError, TypeError): - pass - return False - - def invoke(self, command: str, parent_bot) -> CmdReturn: - """Execute all memory tool calls in the serialized native_tool_calls batch.""" - data = json.loads(command) - results = [] - for tc in data["native_tool_calls"]: - if tc["name"] != "memory": - continue - try: - result = self.call(tc["input"]) - logger.info( - "\U0001f9e0 Native tool 'memory' executed. Result (first 200 chars): %s", - str(result)[:200], - ) - results.append(str(result)) - except Exception as exc: - logger.error("Native tool 'memory' raised: %s", exc) - results.append(f"Error executing tool 'memory': {exc}") - combined = "\n".join(results) - return CmdReturn(stdout=combined, stderr="", return_code=0) - - def clear_all(self) -> None: - """Delete all memory files (useful for testing or resetting state).""" - self._clear() - logger.info("🧠 AnthropicMemoryTool: memory cleared at %s", self._memory_dir) - - # ---------------------------------------------------------------------- # - # BetaAbstractMemoryTool overrides — delegate to MemoryTool private methods - # ---------------------------------------------------------------------- # - - @override - def clear_all_memory(self) -> str: - self.clear_all() - return "All memory cleared" - - @override - def view(self, command: BetaMemoryTool20250818ViewCommand) -> str: - args = [command.path] - if command.view_range: - args += ["--start", str(command.view_range[0]), "--end", str(command.view_range[1])] - result = self._view(args) - if result.return_code != 0: - raise RuntimeError(result.stderr) - return result.stdout - - @override - def create(self, command: BetaMemoryTool20250818CreateCommand) -> str: - result = self._create([command.path, command.file_text]) - if result.return_code != 0: - raise RuntimeError(result.stderr) - return f"File created successfully at {command.path}" - - @override - def str_replace(self, command: BetaMemoryTool20250818StrReplaceCommand) -> str: - result = self._str_replace([command.path, "--old", command.old_str, "--new", command.new_str]) - if result.return_code != 0: - raise RuntimeError(result.stderr) - return f"File {command.path} has been edited" - - @override - def insert(self, command: BetaMemoryTool20250818InsertCommand) -> str: - result = self._insert([ - command.path, - "--line", str(command.insert_line), - "--text", command.insert_text, - ]) - if result.return_code != 0: - raise RuntimeError(result.stderr) - return f"Text inserted at line {command.insert_line} in {command.path}" - - @override - def delete(self, command: BetaMemoryTool20250818DeleteCommand) -> str: - result = self._delete([command.path]) - if result.return_code != 0: - raise RuntimeError(result.stderr) - return result.stdout - - @override - def rename(self, command: BetaMemoryTool20250818RenameCommand) -> str: - result = self._rename([command.old_path, command.new_path]) - if result.return_code != 0: - raise RuntimeError(result.stderr) - return result.stdout diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py index 8131b8b..8c05ca8 100644 --- a/src/microbots/tools/tool_definitions/memory_tool.py +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -67,8 +67,8 @@ @dataclass class MemoryTool(ExternalTool): """ - File-backed memory tool that mirrors the ``AnthropicMemoryTool`` interface - but dispatches through the text command loop (compatible with all providers). + File-backed memory tool that dispatches through the text command loop and + works consistently across providers. Subclass of ``ExternalTool`` — all command lists are empty so ``install_tool``, ``setup_tool``, ``verify_tool_installation``, and diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py index 25c717d..1ce9be6 100644 --- a/test/bot/test_memory_tool_integration.py +++ b/test/bot/test_memory_tool_integration.py @@ -1,7 +1,7 @@ -"""Tests for the Anthropic memory tool end-to-end flow. +"""Tests for memory tool end-to-end flow. Unit tests (mocked API): - Verify wiring — auto-upgrade, tool dispatch, and memory file operations + Verify wiring, tool dispatch, and memory file operations with a mocked Anthropic client. Fast, free, no API key needed. Integration tests (real API): @@ -28,42 +28,12 @@ from microbots import MicroBot, BotRunResult from microbots.llm.llm import llm_output_format_str from microbots.tools.tool_definitions.memory_tool import MemoryTool -from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- -def _make_tool_use_response(tool_name, tool_id, tool_input, thinking_text=""): - """Build a mock Anthropic API response with stop_reason='tool_use'.""" - blocks = [] - - if thinking_text: - text_block = Mock() - text_block.type = "text" - text_block.text = thinking_text - blocks.append(text_block) - - tool_block = Mock() - tool_block.type = "tool_use" - tool_block.name = tool_name - tool_block.id = tool_id - tool_block.input = tool_input - tool_block.model_dump = Mock(return_value={ - "type": "tool_use", - "id": tool_id, - "name": tool_name, - "input": tool_input, - }) - blocks.append(tool_block) - - resp = Mock() - resp.stop_reason = "tool_use" - resp.content = blocks - return resp - - def _make_end_turn_response(task_done, thoughts, command=""): """Build a mock Anthropic API response with stop_reason='end_turn'.""" payload = json.dumps({ @@ -88,7 +58,7 @@ def _make_end_turn_response(task_done, thoughts, command=""): @pytest.mark.unit class TestMemoryToolWiring: - """Unit tests — mocked Anthropic client, real tool dispatch and file ops.""" + """Unit tests for provider-agnostic MemoryTool dispatch with a mocked Anthropic client.""" @pytest.fixture() def memory_dir(self, tmp_path): @@ -101,7 +71,7 @@ def bot(self, memory_dir): """Create a MicroBot with Anthropic provider and a MemoryTool. The Anthropic client is mocked, but the rest of the stack is real: - auto-upgrade, tool dispatch, and memory file operations. + tool dispatch and memory file operations. """ tool = MemoryTool( memory_dir=str(memory_dir), @@ -131,34 +101,27 @@ def bot(self, memory_dir): # -- Upgrade verification ----------------------------------------------- - def test_memory_tool_auto_upgraded_to_anthropic_variant(self, bot): - """MemoryTool passed to MicroBot should be auto-upgraded to AnthropicMemoryTool.""" - upgraded_tools = bot.additional_tools - memory_tools = [t for t in upgraded_tools if isinstance(t, AnthropicMemoryTool)] - assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade" + def test_memory_tool_is_retained(self, bot): + """MemoryTool passed to MicroBot should remain provider-agnostic.""" + memory_tools = [t for t in bot.additional_tools if isinstance(t, MemoryTool)] + assert len(memory_tools) == 1, "Expected exactly one MemoryTool to remain attached" def test_tool_dicts_include_memory_schema(self, bot): - """The LLM should have received the memory tool schema.""" - assert len(bot.llm._tool_dicts) == 1 - assert bot.llm._tool_dicts[0]["type"] == "memory_20250818" + """Provider-agnostic MemoryTool should not register Anthropic-native schemas.""" + assert bot.llm._tool_dicts == [] - # -- Create file via tool_use ------------------------------------------- + # -- Create file via text command --------------------------------------- def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir): """LLM requests a memory create → MicroBot dispatches → file appears on disk.""" # Sequence: - # 1. ask(task) → API returns tool_use (memory create) - # 2. ask(tool_result) → API returns end_turn (task_done=True) + # 1. ask(task) → API returns JSON command (memory create) + # 2. ask(command output) → API returns end_turn (task_done=True) self._mock_client.messages.create.side_effect = [ - _make_tool_use_response( - tool_name="memory", - tool_id="tool_001", - tool_input={ - "command": "create", - "path": "/memories/notes.md", - "file_text": "Hello from integration test", - }, - thinking_text="I'll save a note to memory.", + _make_end_turn_response( + task_done=False, + thoughts="I'll save a note to memory.", + command='memory create /memories/notes.md "Hello from integration test"', ), _make_end_turn_response( task_done=True, @@ -181,7 +144,7 @@ def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir): assert created_file.exists(), f"Expected {created_file} to be created" assert created_file.read_text() == "Hello from integration test" - # -- View file via tool_use --------------------------------------------- + # -- View file via text command ----------------------------------------- def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir): """LLM requests a memory view → MicroBot dispatches → file content returned.""" @@ -190,14 +153,10 @@ def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir): (memory_dir / "existing.md").write_text("Previously saved content") self._mock_client.messages.create.side_effect = [ - _make_tool_use_response( - tool_name="memory", - tool_id="tool_002", - tool_input={ - "command": "view", - "path": "/memories/existing.md", - }, - thinking_text="Let me check my memory.", + _make_end_turn_response( + task_done=False, + thoughts="Let me check my memory.", + command="memory view /memories/existing.md", ), _make_end_turn_response( task_done=True, @@ -213,20 +172,17 @@ def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir): assert result.status is True - # Verify the view result was passed back to the API as tool_result + # Verify the view result was passed back to the API as the next user message calls = self._mock_client.messages.create.call_args_list assert len(calls) == 2 - # The second call should have messages including the tool_result + # The second call should have messages including the file content second_call_messages = calls[1].kwargs.get("messages") or calls[1][1].get("messages", []) - tool_result_msgs = [ + user_messages = [ m for m in second_call_messages - if m.get("role") == "user" and isinstance(m.get("content"), list) - and any(c.get("type") == "tool_result" for c in m["content"]) + if m.get("role") == "user" and isinstance(m.get("content"), str) ] - assert len(tool_result_msgs) >= 1, "Expected a tool_result message in the second API call" - # The tool_result content should contain the file content - tool_result_content = tool_result_msgs[-1]["content"][0]["content"] - assert "Previously saved content" in tool_result_content + assert len(user_messages) >= 2, "Expected the command output to be sent as a user message" + assert "Previously saved content" in user_messages[-1]["content"] # -- Multiple tool calls in sequence ------------------------------------ @@ -234,25 +190,16 @@ def test_create_then_view_memory_file(self, bot, memory_dir): """LLM creates a file, then views it — both dispatched via MicroBot loop.""" self._mock_client.messages.create.side_effect = [ # Step 1: create file - _make_tool_use_response( - tool_name="memory", - tool_id="tool_003", - tool_input={ - "command": "create", - "path": "/memories/todo.md", - "file_text": "- Fix bug #42\n- Write tests", - }, - thinking_text="Creating a todo list.", + _make_end_turn_response( + task_done=False, + thoughts="Creating a todo list.", + command='memory create /memories/todo.md "- Fix bug #42\n- Write tests"', ), # Step 2: view file - _make_tool_use_response( - tool_name="memory", - tool_id="tool_004", - tool_input={ - "command": "view", - "path": "/memories/todo.md", - }, - thinking_text="Let me verify what I wrote.", + _make_end_turn_response( + task_done=False, + thoughts="Let me verify what I wrote.", + command="memory view /memories/todo.md", ), # Step 3: done _make_end_turn_response( @@ -358,10 +305,10 @@ def memory_bot(self, memory_dir): yield bot del bot - def test_memory_tool_auto_upgraded(self, memory_bot): - """MemoryTool should be silently auto-upgraded to AnthropicMemoryTool.""" - memory_tools = [t for t in memory_bot.additional_tools if isinstance(t, AnthropicMemoryTool)] - assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade" + def test_memory_tool_is_retained(self, memory_bot): + """MemoryTool should remain attached for Anthropic just like other providers.""" + memory_tools = [t for t in memory_bot.additional_tools if isinstance(t, MemoryTool)] + assert len(memory_tools) == 1, "Expected exactly one MemoryTool to remain attached" def test_create_memory_file(self, memory_bot, memory_dir): """MicroBot should persist a debugging plan to memory. diff --git a/test/bot/test_upgrade_tools_for_provider.py b/test/bot/test_upgrade_tools_for_provider.py deleted file mode 100644 index 459afd4..0000000 --- a/test/bot/test_upgrade_tools_for_provider.py +++ /dev/null @@ -1,117 +0,0 @@ -"""Unit tests for AnthropicApi.upgrade_tools() method. - -These tests verify that plain ``MemoryTool`` instances are automatically -replaced with ``AnthropicMemoryTool`` when using ``AnthropicApi.upgrade_tools``. -""" -import sys -import os -import logging -import pytest -from unittest.mock import patch, Mock - -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) - -from microbots.llm.anthropic_api import AnthropicApi -from microbots.tools.tool_definitions.memory_tool import MemoryTool -from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def _memory_tool(tmp_path, instructions: str = "default instructions") -> MemoryTool: - return MemoryTool( - memory_dir=str(tmp_path / "memory"), - usage_instructions_to_llm=instructions, - ) - - -# --------------------------------------------------------------------------- -# Tests -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestUpgradeToolsForProvider: - - @pytest.fixture(autouse=True) - def _create_api(self): - with patch("microbots.llm.anthropic_api.Anthropic"): - self.api = AnthropicApi(system_prompt="test") - - # -- AnthropicApi.upgrade_tools: MemoryTool → AnthropicMemoryTool -------- - - def test_memory_tool_is_replaced_with_anthropic_variant(self, tmp_path): - tool = _memory_tool(tmp_path) - - upgraded = self.api.upgrade_tools([tool]) - - assert len(upgraded) == 1 - assert isinstance(upgraded[0], AnthropicMemoryTool) - - def test_memory_dir_is_forwarded_to_upgraded_tool(self, tmp_path): - mem_dir = str(tmp_path / "my_memory") - tool = MemoryTool(memory_dir=mem_dir) - - upgraded = self.api.upgrade_tools([tool]) - - assert isinstance(upgraded[0], AnthropicMemoryTool) - assert str(upgraded[0].memory_dir) == mem_dir - - def test_usage_instructions_are_forwarded_to_upgraded_tool(self, tmp_path): - custom_instructions = "custom memory instructions for test" - tool = _memory_tool(tmp_path, instructions=custom_instructions) - - upgraded = self.api.upgrade_tools([tool]) - - assert upgraded[0].usage_instructions_to_llm == custom_instructions - - def test_already_anthropic_memory_tool_is_not_re_upgraded(self, tmp_path): - existing = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory")) - - upgraded = self.api.upgrade_tools([existing]) - - assert len(upgraded) == 1 - assert upgraded[0] is existing - - def test_non_memory_tools_are_kept_unchanged(self, tmp_path): - other_tool = Mock() - other_tool.__class__ = Mock # not a MemoryTool subclass - - upgraded = self.api.upgrade_tools([other_tool]) - - assert len(upgraded) == 1 - assert upgraded[0] is other_tool - - def test_mixed_tool_list_upgrades_only_memory_tools(self, tmp_path): - plain_memory = _memory_tool(tmp_path) - already_upgraded = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory2")) - other_tool = Mock(spec=[]) - - upgraded = self.api.upgrade_tools([plain_memory, already_upgraded, other_tool]) - - assert len(upgraded) == 3 - # first: should have been upgraded - assert isinstance(upgraded[0], AnthropicMemoryTool) - assert upgraded[0] is not plain_memory - # second: already AnthropicMemoryTool, untouched - assert upgraded[1] is already_upgraded - # third: non-memory tool, untouched - assert upgraded[2] is other_tool - - def test_empty_tool_list_is_a_no_op(self): - upgraded = self.api.upgrade_tools([]) - - assert upgraded == [] - - def test_logger_info_called_for_each_upgraded_tool(self, tmp_path, caplog): - tool1 = _memory_tool(tmp_path) - tmp_path2 = tmp_path / "sub" - tmp_path2.mkdir() - tool2 = _memory_tool(tmp_path2) - - with caplog.at_level(logging.INFO): - self.api.upgrade_tools([tool1, tool2]) - - upgrade_logs = [r for r in caplog.records if "Auto-upgrading" in r.message] - assert len(upgrade_logs) == 2 diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py deleted file mode 100644 index 81db12c..0000000 --- a/test/tools/tool_definitions/test_anthropic_memory_tool.py +++ /dev/null @@ -1,421 +0,0 @@ -""" -Unit tests for AnthropicMemoryTool. - -Covers: - - __init__: memory_dir / usage_instructions forwarding and defaults - - is_model_supported - - is_invoked - - clear_all / clear_all_memory (SDK override) - - SDK overrides: view, create, str_replace, insert, delete, rename - (happy-path + RuntimeError on failure) -""" -import logging -import pytest - -from anthropic.types.beta import ( - BetaMemoryTool20250818CreateCommand, - BetaMemoryTool20250818DeleteCommand, - BetaMemoryTool20250818InsertCommand, - BetaMemoryTool20250818RenameCommand, - BetaMemoryTool20250818StrReplaceCommand, - BetaMemoryTool20250818ViewCommand, -) - -from microbots.tools.tool_definitions.anthropic_memory_tool import ( - DEFAULT_MEMORY_INSTRUCTIONS, - AnthropicMemoryTool, -) -from microbots.tools.tool_definitions.memory_tool import MemoryTool - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def make_tool(tmp_path) -> AnthropicMemoryTool: - return AnthropicMemoryTool(memory_dir=str(tmp_path / "memory")) - - -# --------------------------------------------------------------------------- -# __init__ -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestAnthropicMemoryToolInit: - - def test_is_subclass_of_memory_tool(self, tmp_path): - assert isinstance(make_tool(tmp_path), MemoryTool) - - def test_memory_dir_is_forwarded(self, tmp_path): - mem_dir = str(tmp_path / "my_memory") - tool = AnthropicMemoryTool(memory_dir=mem_dir) - assert str(tool._memory_dir) == mem_dir - - def test_memory_dir_is_created_on_init(self, tmp_path): - mem_dir = tmp_path / "new_memory" - assert not mem_dir.exists() - AnthropicMemoryTool(memory_dir=str(mem_dir)) - assert mem_dir.exists() - - def test_default_memory_dir_under_home(self, monkeypatch, tmp_path): - from pathlib import Path - monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) - tool = AnthropicMemoryTool() - assert tool._memory_dir == tmp_path / ".microbots" / "memory" - - def test_custom_usage_instructions_are_stored(self, tmp_path): - custom = "custom instructions" - tool = AnthropicMemoryTool( - memory_dir=str(tmp_path / "memory"), - usage_instructions=custom, - ) - assert tool.usage_instructions_to_llm == custom - - def test_default_usage_instructions_are_applied_when_none(self, tmp_path): - tool = make_tool(tmp_path) - assert tool.usage_instructions_to_llm == DEFAULT_MEMORY_INSTRUCTIONS - - -# --------------------------------------------------------------------------- -# is_model_supported -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestAnthropicMemoryToolIsModelSupported: - - def test_returns_true_for_claude_models(self, tmp_path): - tool = make_tool(tmp_path) - for model in ("claude-3-sonnet", "claude-3-5-haiku", "Claude-Opus-4"): - assert tool.is_model_supported(model) is True - - def test_returns_false_for_non_claude_models(self, tmp_path): - tool = make_tool(tmp_path) - for model in ("gpt-4", "ollama/llama3", "azure-openai/gpt-5", ""): - assert tool.is_model_supported(model) is False - - -# --------------------------------------------------------------------------- -# is_invoked -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestAnthropicMemoryToolIsInvoked: - - def test_returns_true_for_native_tool_calls_with_memory(self, tmp_path): - tool = make_tool(tmp_path) - import json - cmd = json.dumps({"native_tool_calls": [{"name": "memory", "id": "tu_1", "input": {}}]}) - assert tool.is_invoked(cmd) is True - - def test_returns_false_for_native_tool_calls_without_memory(self, tmp_path): - tool = make_tool(tmp_path) - import json - cmd = json.dumps({"native_tool_calls": [{"name": "bash", "id": "tu_1", "input": {}}]}) - assert tool.is_invoked(cmd) is False - - def test_returns_false_for_plain_commands(self, tmp_path): - tool = make_tool(tmp_path) - for cmd in ("memory view /memories", "memory clear", "anything", ""): - assert tool.is_invoked(cmd) is False - - -# --------------------------------------------------------------------------- -# clear_all / clear_all_memory -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestAnthropicMemoryToolClearAll: - - def test_clear_all_removes_all_files(self, tmp_path): - tool = make_tool(tmp_path) - (tool._memory_dir / "a.md").write_text("a") - (tool._memory_dir / "b.md").write_text("b") - - tool.clear_all() - - assert list(tool._memory_dir.iterdir()) == [] - - def test_clear_all_leaves_memory_dir_intact(self, tmp_path): - tool = make_tool(tmp_path) - tool.clear_all() - assert tool._memory_dir.exists() - - def test_clear_all_logs_info(self, tmp_path, caplog): - tool = make_tool(tmp_path) - with caplog.at_level(logging.INFO): - tool.clear_all() - assert "AnthropicMemoryTool" in caplog.text - assert "cleared" in caplog.text - - def test_clear_all_memory_returns_string(self, tmp_path): - tool = make_tool(tmp_path) - result = tool.clear_all_memory() - assert result == "All memory cleared" - - def test_clear_all_memory_removes_files(self, tmp_path): - tool = make_tool(tmp_path) - (tool._memory_dir / "f.md").write_text("data") - - tool.clear_all_memory() - - assert list(tool._memory_dir.iterdir()) == [] - - -# --------------------------------------------------------------------------- -# view (SDK override) -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestAnthropicMemoryToolView: - - def test_view_returns_file_contents(self, tmp_path): - tool = make_tool(tmp_path) - (tool._memory_dir / "notes.md").write_text("hello\nworld\n") - - cmd = BetaMemoryTool20250818ViewCommand( - command="view", path="/memories/notes.md", view_range=None - ) - result = tool.view(cmd) - - assert "hello" in result - assert "world" in result - - def test_view_with_view_range(self, tmp_path): - tool = make_tool(tmp_path) - (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n") - - cmd = BetaMemoryTool20250818ViewCommand( - command="view", path="/memories/f.md", view_range=[2, 4] - ) - result = tool.view(cmd) - - assert "b" in result - assert "d" in result - assert "a" not in result - assert "e" not in result - - def test_view_raises_runtime_error_on_failure(self, tmp_path): - tool = make_tool(tmp_path) - cmd = BetaMemoryTool20250818ViewCommand( - command="view", path="/memories/nonexistent.md", view_range=None - ) - with pytest.raises(RuntimeError): - tool.view(cmd) - - -# --------------------------------------------------------------------------- -# create (SDK override) -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestAnthropicMemoryToolCreate: - - def test_create_writes_file(self, tmp_path): - tool = make_tool(tmp_path) - cmd = BetaMemoryTool20250818CreateCommand( - command="create", path="/memories/new.md", file_text="hello world" - ) - result = tool.create(cmd) - - assert "new.md" in result - assert (tool._memory_dir / "new.md").read_text() == "hello world" - - def test_create_raises_runtime_error_on_failure(self, tmp_path): - """Ensures the `raise RuntimeError(result.stderr)` branch is exercised by - mocking _create to return a non-zero CmdReturn.""" - from unittest.mock import patch - from microbots.environment.Environment import CmdReturn - - tool = make_tool(tmp_path) - cmd = BetaMemoryTool20250818CreateCommand( - command="create", path="/memories/new.md", file_text="x" - ) - with patch.object( - tool, "_create", return_value=CmdReturn(stdout="", stderr="disk full", return_code=1) - ): - with pytest.raises(RuntimeError, match="disk full"): - tool.create(cmd) - - -# --------------------------------------------------------------------------- -# str_replace (SDK override) -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestAnthropicMemoryToolStrReplace: - - def test_str_replace_edits_file(self, tmp_path): - tool = make_tool(tmp_path) - (tool._memory_dir / "f.md").write_text("hello world") - - cmd = BetaMemoryTool20250818StrReplaceCommand( - command="str_replace", - path="/memories/f.md", - old_str="hello", - new_str="goodbye", - ) - result = tool.str_replace(cmd) - - assert "f.md" in result - assert (tool._memory_dir / "f.md").read_text() == "goodbye world" - - def test_str_replace_raises_runtime_error_on_failure(self, tmp_path): - tool = make_tool(tmp_path) - (tool._memory_dir / "f.md").write_text("hello world") - cmd = BetaMemoryTool20250818StrReplaceCommand( - command="str_replace", - path="/memories/f.md", - old_str="not present", - new_str="x", - ) - with pytest.raises(RuntimeError): - tool.str_replace(cmd) - - -# --------------------------------------------------------------------------- -# insert (SDK override) -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestAnthropicMemoryToolInsert: - - def test_insert_prepends_line(self, tmp_path): - tool = make_tool(tmp_path) - (tool._memory_dir / "f.md").write_text("line1\nline2\n") - - cmd = BetaMemoryTool20250818InsertCommand( - command="insert", - path="/memories/f.md", - insert_line=0, - insert_text="prepended", - ) - result = tool.insert(cmd) - - assert "0" in result or "prepended" in result or "f.md" in result - lines = (tool._memory_dir / "f.md").read_text().splitlines() - assert lines[0] == "prepended" - - def test_insert_raises_runtime_error_on_failure(self, tmp_path): - tool = make_tool(tmp_path) - cmd = BetaMemoryTool20250818InsertCommand( - command="insert", - path="/memories/missing.md", - insert_line=0, - insert_text="x", - ) - with pytest.raises(RuntimeError): - tool.insert(cmd) - - -# --------------------------------------------------------------------------- -# delete (SDK override) -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestAnthropicMemoryToolDelete: - - def test_delete_removes_file(self, tmp_path): - tool = make_tool(tmp_path) - (tool._memory_dir / "f.md").write_text("data") - - cmd = BetaMemoryTool20250818DeleteCommand( - command="delete", path="/memories/f.md" - ) - tool.delete(cmd) - - assert not (tool._memory_dir / "f.md").exists() - - def test_delete_raises_runtime_error_on_failure(self, tmp_path): - tool = make_tool(tmp_path) - cmd = BetaMemoryTool20250818DeleteCommand( - command="delete", path="/memories/nonexistent.md" - ) - with pytest.raises(RuntimeError): - tool.delete(cmd) - - -# --------------------------------------------------------------------------- -# rename (SDK override) -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestAnthropicMemoryToolRename: - - def test_rename_moves_file(self, tmp_path): - tool = make_tool(tmp_path) - (tool._memory_dir / "old.md").write_text("content") - - cmd = BetaMemoryTool20250818RenameCommand( - command="rename", - old_path="/memories/old.md", - new_path="/memories/new.md", - ) - tool.rename(cmd) - - assert not (tool._memory_dir / "old.md").exists() - assert (tool._memory_dir / "new.md").read_text() == "content" - - def test_rename_raises_runtime_error_on_failure(self, tmp_path): - tool = make_tool(tmp_path) - cmd = BetaMemoryTool20250818RenameCommand( - command="rename", - old_path="/memories/missing.md", - new_path="/memories/new.md", - ) - with pytest.raises(RuntimeError): - tool.rename(cmd) - - -# --------------------------------------------------------------------------- -# invoke — non-memory tool calls are skipped -# --------------------------------------------------------------------------- - -@pytest.mark.unit -class TestAnthropicMemoryToolInvoke: - - def test_invoke_skips_non_memory_tool_calls(self, tmp_path): - """The ``if tc["name"] != "memory": continue`` branch is exercised - when native_tool_calls contains a non-memory tool.""" - import json - from unittest.mock import Mock - - tool = make_tool(tmp_path) - (tool._memory_dir / "f.md").write_text("hello") - - command = json.dumps({ - "native_tool_calls": [ - {"name": "bash", "id": "tu_1", "input": {"command": "ls"}}, - {"name": "memory", "id": "tu_2", "input": { - "command": "view", "path": "/memories/f.md", "view_range": None, - }}, - ] - }) - - result = tool.invoke(command, parent_bot=Mock()) - - assert result.return_code == 0 - # Only the memory call should produce output; bash should be skipped - assert "hello" in result.stdout - - def test_invoke_catches_exception_from_tool_call(self, tmp_path): - """The ``except Exception`` branch is exercised when tool.call() raises.""" - import json - from unittest.mock import Mock, patch - - tool = make_tool(tmp_path) - - command = json.dumps({ - "native_tool_calls": [ - {"name": "memory", "id": "tu_1", "input": { - "command": "view", "path": "/memories/nonexistent.md", "view_range": None, - }}, - ] - }) - - # Force call() to raise an exception - with patch.object(tool, "call", side_effect=RuntimeError("boom")): - result = tool.invoke(command, parent_bot=Mock()) - - assert result.return_code == 0 - assert "Error executing tool 'memory'" in result.stdout - assert "boom" in result.stdout From 364be952c1538d7d180b2de5fed7efd3df2534cd Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Wed, 11 Mar 2026 12:57:29 +0000 Subject: [PATCH 12/20] Add test in MemoryTool --- test/tools/tool_definitions/test_memory_tool.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py index 089ca52..584aa54 100644 --- a/test/tools/tool_definitions/test_memory_tool.py +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -92,6 +92,19 @@ def test_resolve_rejects_path_traversal(self, tmp_path): with pytest.raises(ValueError, match="Path traversal"): tool._resolve("/memories/../../etc/passwd") + def test_resolve_rejects_symlink_escaping_memory_dir(self, tmp_path): + """A symlink inside memory_dir that resolves outside must be rejected + by the startswith guard (not the '..' check).""" + tool = make_tool(tmp_path) + outside = tmp_path / "outside" + outside.mkdir() + (outside / "secret.txt").write_text("sensitive") + # Create a symlink inside the memory dir pointing outside + link = tool._memory_dir / "escape" + link.symlink_to(outside) + with pytest.raises(ValueError, match="Path traversal"): + tool._resolve("/memories/escape/secret.txt") + def test_resolve_rejects_non_memory_paths(self, tmp_path): tool = make_tool(tmp_path) for bad in ("/workdir/file", "/home/user/file", "/tmp/file"): From 61704c9977a8d65fc5b699cc77256c10a2b31de9 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Thu, 12 Mar 2026 05:25:45 +0000 Subject: [PATCH 13/20] Refactor AnthropicApi and LLMInterface: remove unused parameters and methods --- src/microbots/MicroBot.py | 4 +- src/microbots/llm/anthropic_api.py | 129 ++--------- src/microbots/llm/llm.py | 9 - src/microbots/tools/tool.py | 4 +- test/bot/test_memory_tool_integration.py | 4 - test/llm/test_anthropic_api.py | 276 ----------------------- test/llm/test_llm.py | 31 +-- 7 files changed, 17 insertions(+), 440 deletions(-) diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py index f2708b1..3ace5a4 100644 --- a/src/microbots/MicroBot.py +++ b/src/microbots/MicroBot.py @@ -335,9 +335,7 @@ def _create_llm(self): ) elif self.model_provider == ModelProvider.ANTHROPIC: self.llm = AnthropicApi( - system_prompt=system_prompt_with_tools, - deployment_name=self.deployment_name, - additional_tools=self.additional_tools, + system_prompt=system_prompt_with_tools, deployment_name=self.deployment_name ) # No Else case required as model provider is already validated using _validate_model_and_provider diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py index 2da9bab..f40118a 100644 --- a/src/microbots/llm/anthropic_api.py +++ b/src/microbots/llm/anthropic_api.py @@ -1,9 +1,7 @@ import json import os -import re from dataclasses import asdict from logging import getLogger -from typing import List, Optional from dotenv import load_dotenv from anthropic import Anthropic @@ -18,29 +16,9 @@ api_key = os.getenv("ANTHROPIC_API_KEY") - class AnthropicApi(LLMInterface): - def __init__( - self, - system_prompt: str, - deployment_name: str = deployment_name, - max_retries: int = 3, - additional_tools: Optional[List] = None, - ): - """ - Parameters - ---------- - system_prompt : str - System prompt for the LLM. - deployment_name : str - The Anthropic model deployment name. - max_retries : int - Maximum number of retries for invalid LLM responses. - additional_tools : Optional[List] - Tool objects passed from MicroBot. Any tools exposing a native - Anthropic schema via ``to_dict()`` are forwarded to the API. - """ + def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3): self.ai_client = Anthropic( api_key=api_key, base_url=endpoint @@ -49,111 +27,30 @@ def __init__( self.system_prompt = system_prompt self.messages = [] - # Preserve tool instances as provided and extract native Anthropic schemas. - tools = additional_tools or [] - upgraded = self.upgrade_tools(tools) - if additional_tools is not None: - additional_tools[:] = upgraded - self._tool_dicts = [ - t.to_dict() for t in upgraded - if callable(getattr(t, "to_dict", None)) - ] - self._pending_tool_response = None - # Set these values here. This logic will be handled in the parent class. self.max_retries = max_retries self.retries = 0 - # ---------------------------------------------------------------------- # - # Internal helpers - # ---------------------------------------------------------------------- # - - def _call_api(self) -> object: - """Call the Anthropic messages API, including tool definitions when present.""" - kwargs = dict( - model=self.deployment_name, - system=self.system_prompt, - messages=self.messages, - max_tokens=4096, - ) - - if self._tool_dicts: - kwargs["tools"] = self._tool_dicts - - return self.ai_client.messages.create(**kwargs) - - def _append_tool_result(self, response, result_text: str) -> None: - """Append the assistant tool_use turn and the corresponding tool_result user turn. - - Called when the caller provides the tool execution result via - the next ``ask()`` call. - """ - assistant_content = [block.model_dump() for block in response.content] - self.messages.append({"role": "assistant", "content": assistant_content}) - - tool_results = [] - for block in response.content: - if block.type != "tool_use": - continue - tool_results.append({ - "type": "tool_result", - "tool_use_id": block.id, - "content": str(result_text), - }) - - self.messages.append({"role": "user", "content": tool_results}) - - # ---------------------------------------------------------------------- # - # Public interface - # ---------------------------------------------------------------------- # - - def ask(self, message: str) -> LLMAskResponse: + def ask(self, message) -> LLMAskResponse: self.retries = 0 # reset retries for each ask. Handled in parent class. - if self._pending_tool_response: - # Previous response was tool_use — format this message as tool results. - self._append_tool_result(self._pending_tool_response, message) - self._pending_tool_response = None - else: - self.messages.append({"role": "user", "content": message}) + self.messages.append({"role": "user", "content": message}) valid = False while not valid: - response = self._call_api() - - if response.stop_reason == "tool_use": - # Return tool call info as an LLMAskResponse so the - # caller (MicroBot.run) can dispatch the tool. - self._pending_tool_response = response - - thoughts = "" - for block in response.content: - if block.type == "text": - thoughts = block.text - break - - tool_calls = [] - for block in response.content: - if block.type == "tool_use": - tool_calls.append({ - "name": block.name, - "id": block.id, - "input": block.input, - }) - - command = json.dumps({"native_tool_calls": tool_calls}) - return LLMAskResponse(task_done=False, thoughts=thoughts, command=command) - - # Extract text content from the final response - response_text = "" - for block in response.content: - if block.type == "text": - response_text = block.text - break - + response = self.ai_client.messages.create( + model=self.deployment_name, + system=self.system_prompt, + messages=self.messages, + max_tokens=4096, + ) + + # Extract text content from response + response_text = response.content[0].text if response.content else "" logger.debug("Raw Anthropic response (first 500 chars): %s", response_text[:500]) # Try to extract JSON if wrapped in markdown code blocks + import re json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL) if json_match: response_text = json_match.group(1) diff --git a/src/microbots/llm/llm.py b/src/microbots/llm/llm.py index e7e5c22..2800790 100644 --- a/src/microbots/llm/llm.py +++ b/src/microbots/llm/llm.py @@ -29,15 +29,6 @@ def ask(self, message: str) -> LLMAskResponse: def clear_history(self) -> bool: pass - def upgrade_tools(self, tools: list) -> list: - """Upgrade tools for the specific LLM provider. - - The default implementation is a no-op. Subclasses (e.g. - ``AnthropicApi``) override this to swap provider-agnostic tools - with their native equivalents. - """ - return tools - def _validate_llm_response(self, response: str) -> tuple[bool, LLMAskResponse]: if self.retries >= self.max_retries: diff --git a/src/microbots/tools/tool.py b/src/microbots/tools/tool.py index 6b8351f..39eaa4e 100644 --- a/src/microbots/tools/tool.py +++ b/src/microbots/tools/tool.py @@ -40,8 +40,8 @@ class ToolAbstract(ABC): Tool hierarchy: ToolAbstract (ABC) ├── Tool (ToolAbstract) — Docker sandbox tools (install_commands, env_variables, etc.) - └── ExternalTool (ToolAbstract) — LLM-native tools (get_tool_definition, execute) - └── AnthropicMemoryTool + └── ExternalTool (ToolAbstract) — Host-side tools (get_tool_definition, execute) + └── MemoryTool """ # TODO: Handle different instructions based on the platform (linux flavours, windows, mac) # TODO: Add versioning to tools diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py index 1ce9be6..3debe6c 100644 --- a/test/bot/test_memory_tool_integration.py +++ b/test/bot/test_memory_tool_integration.py @@ -106,10 +106,6 @@ def test_memory_tool_is_retained(self, bot): memory_tools = [t for t in bot.additional_tools if isinstance(t, MemoryTool)] assert len(memory_tools) == 1, "Expected exactly one MemoryTool to remain attached" - def test_tool_dicts_include_memory_schema(self, bot): - """Provider-agnostic MemoryTool should not register Anthropic-native schemas.""" - assert bot.llm._tool_dicts == [] - # -- Create file via text command --------------------------------------- def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir): diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py index 2c11966..4db2d96 100644 --- a/test/llm/test_anthropic_api.py +++ b/test/llm/test_anthropic_api.py @@ -535,282 +535,6 @@ def test_anthropic_api_clear_history_integration(self): assert len(api.messages) == 0 # Anthropic doesn't store system in messages -# ============================================================================ -# Tests for native_tools support (new changes) -# ============================================================================ - -@pytest.mark.unit -class TestAnthropicApiToolDictsInit: - """Tests for __init__ tool upgrade and tool_dicts extraction.""" - - @pytest.fixture(autouse=True) - def _use_patch(self, patch_anthropic_config): - pass - - def test_init_without_additional_tools_has_empty_tool_dicts(self): - api = AnthropicApi(system_prompt="test") - - assert api._tool_dicts == [] - - def test_init_with_none_additional_tools_has_empty_tool_dicts(self): - api = AnthropicApi(system_prompt="test", additional_tools=None) - - assert api._tool_dicts == [] - - def test_init_with_tool_having_to_dict_extracts_dicts(self): - tool = Mock() - tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"} - # Ensure it's not a MemoryTool so upgrade_tools won't touch it - tool.__class__ = Mock - - api = AnthropicApi(system_prompt="test", additional_tools=[tool]) - - assert api._tool_dicts == [{"name": "memory", "type": "memory_20250818"}] - - def test_init_sets_pending_tool_response_to_none(self): - api = AnthropicApi(system_prompt="test") - - assert api._pending_tool_response is None - - -@pytest.mark.unit -class TestAnthropicApiCallApiWithTools: - """Tests for _call_api including/excluding the tools kwarg.""" - - @pytest.fixture(autouse=True) - def _use_patch(self, patch_anthropic_config): - pass - - def test_call_api_without_tools_omits_tools_kwarg(self): - api = AnthropicApi(system_prompt="test", deployment_name="claude-3") - api.messages = [{"role": "user", "content": "hello"}] - api.ai_client.messages.create = Mock(return_value=Mock()) - - api._call_api() - - call_kwargs = api.ai_client.messages.create.call_args[1] - assert "tools" not in call_kwargs - - def test_call_api_with_tool_dicts_passes_them(self): - dicts = [{"name": "memory", "type": "memory_20250818"}] - api = AnthropicApi(system_prompt="test", deployment_name="claude-3") - api._tool_dicts = dicts - api.messages = [{"role": "user", "content": "hello"}] - api.ai_client.messages.create = Mock(return_value=Mock()) - - api._call_api() - - call_kwargs = api.ai_client.messages.create.call_args[1] - assert "tools" in call_kwargs - assert call_kwargs["tools"] == dicts - - -@pytest.mark.unit -class TestAnthropicApiAppendToolResult: - """Tests for _append_tool_result.""" - - @pytest.fixture(autouse=True) - def _use_patch(self, patch_anthropic_config): - pass - - @staticmethod - def _tool_use_block(name, tool_id="tu_001", input_data=None): - block = Mock() - block.type = "tool_use" - block.name = name - block.id = tool_id - block.input = input_data or {} - block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": name} - return block - - @staticmethod - def _text_block(text="hello"): - block = Mock() - block.type = "text" - block.text = text - block.model_dump.return_value = {"type": "text", "text": text} - return block - - def test_appends_assistant_message_first(self): - api = AnthropicApi(system_prompt="test") - - response = Mock() - response.content = [self._tool_use_block("memory")] - api._append_tool_result(response, "ok") - - assert api.messages[0]["role"] == "assistant" - - def test_appends_tool_result_user_message(self): - api = AnthropicApi(system_prompt="test") - - response = Mock() - response.content = [self._tool_use_block("memory", tool_id="tu_abc")] - api._append_tool_result(response, "file listing") - - user_msg = api.messages[1] - assert user_msg["role"] == "user" - assert user_msg["content"][0]["type"] == "tool_result" - assert user_msg["content"][0]["tool_use_id"] == "tu_abc" - assert user_msg["content"][0]["content"] == "file listing" - - def test_skips_non_tool_use_content_blocks(self): - api = AnthropicApi(system_prompt="test") - - response = Mock() - response.content = [ - self._text_block("thinking..."), - self._tool_use_block("memory", tool_id="tu_only"), - ] - api._append_tool_result(response, "result") - - tool_results = api.messages[1]["content"] - assert len(tool_results) == 1 - assert tool_results[0]["tool_use_id"] == "tu_only" - - def test_handles_multiple_tool_use_blocks(self): - api = AnthropicApi(system_prompt="test") - - response = Mock() - response.content = [ - self._tool_use_block("memory", tool_id="id_1"), - self._tool_use_block("bash", tool_id="id_2"), - ] - api._append_tool_result(response, "combined result") - - results = api.messages[1]["content"] - assert len(results) == 2 - assert results[0]["tool_use_id"] == "id_1" - assert results[0]["content"] == "combined result" - assert results[1]["tool_use_id"] == "id_2" - assert results[1]["content"] == "combined result" - - -@pytest.mark.unit -class TestAnthropicApiAskWithToolUse: - """Tests for ask() returning tool_use as LLMAskResponse and accepting tool results.""" - - @pytest.fixture(autouse=True) - def _use_patch(self, patch_anthropic_config): - pass - - @staticmethod - def _tool_use_response(tool_name, tool_id, input_data=None): - block = Mock() - block.type = "tool_use" - block.name = tool_name - block.id = tool_id - block.input = input_data or {} - block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": tool_name} - response = Mock() - response.stop_reason = "tool_use" - response.content = [block] - return response - - @staticmethod - def _text_response(json_dict): - block = Mock() - block.type = "text" - block.text = json.dumps(json_dict) - block.model_dump.return_value = {"type": "text", "text": block.text} - response = Mock() - response.stop_reason = "end_turn" - response.content = [block] - return response - - def test_ask_returns_tool_use_as_ask_response(self): - dicts = [{"name": "memory", "type": "memory_20250818"}] - api = AnthropicApi(system_prompt="test") - api._tool_dicts = dicts - - tool_resp = self._tool_use_response("memory", "tu_1", {"command": "view", "path": "/memories"}) - api.ai_client.messages.create = Mock(return_value=tool_resp) - - result = api.ask("do the task") - - assert result.task_done is False - assert '"native_tool_calls"' in result.command - parsed = json.loads(result.command) - assert parsed["native_tool_calls"][0]["name"] == "memory" - assert parsed["native_tool_calls"][0]["id"] == "tu_1" - assert api._pending_tool_response is tool_resp - - def test_ask_stores_pending_tool_response(self): - api = AnthropicApi(system_prompt="test") - api._tool_dicts = [{"name": "memory"}] - - tool_resp = self._tool_use_response("memory", "tu_1") - api.ai_client.messages.create = Mock(return_value=tool_resp) - - api.ask("do it") - - assert api._pending_tool_response is tool_resp - - def test_ask_with_pending_tool_response_formats_tool_result(self): - api = AnthropicApi(system_prompt="test") - api._tool_dicts = [{"name": "memory"}] - - tool_resp = self._tool_use_response("memory", "tu_1") - final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""}) - api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp]) - - # First ask — returns tool_use - api.ask("do the task") - - # Second ask — sends tool result, formats as tool_result - result = api.ask("viewed /memories") - - assert result.command == "ls /" - assert api._pending_tool_response is None - - # Check messages contain the tool_result - tool_result_msgs = [ - m for m in api.messages - if m["role"] == "user" and isinstance(m["content"], list) - ] - assert len(tool_result_msgs) == 1 - assert tool_result_msgs[0]["content"][0]["type"] == "tool_result" - assert tool_result_msgs[0]["content"][0]["tool_use_id"] == "tu_1" - - def test_ask_without_tool_use_works_normally(self): - api = AnthropicApi(system_prompt="test") - - final_resp = self._text_response({"task_done": False, "command": "pwd", "thoughts": ""}) - api.ai_client.messages.create = Mock(return_value=final_resp) - - result = api.ask("where am I?") - - assert api.ai_client.messages.create.call_count == 1 - assert result.command == "pwd" - assert api._pending_tool_response is None - - def test_ask_extracts_thoughts_from_tool_use_response(self): - api = AnthropicApi(system_prompt="test") - api._tool_dicts = [{"name": "memory"}] - - # Build a tool_use response with a text block for thoughts - text_block = Mock() - text_block.type = "text" - text_block.text = "Let me check memory first" - text_block.model_dump.return_value = {"type": "text", "text": text_block.text} - - tool_block = Mock() - tool_block.type = "tool_use" - tool_block.name = "memory" - tool_block.id = "tu_1" - tool_block.input = {} - tool_block.model_dump.return_value = {"type": "tool_use", "id": "tu_1", "name": "memory"} - - response = Mock() - response.stop_reason = "tool_use" - response.content = [text_block, tool_block] - - api.ai_client.messages.create = Mock(return_value=response) - - result = api.ask("do the task") - - assert result.thoughts == "Let me check memory first" - - if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/test/llm/test_llm.py b/test/llm/test_llm.py index af5a7d2..aa64fe7 100644 --- a/test/llm/test_llm.py +++ b/test/llm/test_llm.py @@ -741,33 +741,4 @@ def test_task_done_true_with_not_none_command_field(self, llm): assert valid is False assert llm_response is None assert llm.retries == 1 - assert len(llm.messages) == 1 - - -@pytest.mark.unit -class TestUpgradeToolsDefault: - """Tests for the default LLMInterface.upgrade_tools no-op implementation.""" - - @pytest.fixture - def llm(self): - return ConcreteLLM() - - def test_returns_same_list(self, llm): - """Default upgrade_tools returns the input list unchanged.""" - tools = ["tool_a", "tool_b"] - result = llm.upgrade_tools(tools) - assert result is tools - - def test_empty_list(self, llm): - """Default upgrade_tools handles an empty list.""" - tools = [] - result = llm.upgrade_tools(tools) - assert result == [] - - def test_preserves_tool_order_and_identity(self, llm): - """Default upgrade_tools does not reorder or copy elements.""" - sentinel = object() - tools = [sentinel, "other"] - result = llm.upgrade_tools(tools) - assert result[0] is sentinel - assert result[1] == "other" \ No newline at end of file + assert len(llm.messages) == 1 \ No newline at end of file From 7d6c782a832fca7561ed4b304fa852c7049693f3 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Thu, 12 Mar 2026 05:51:17 +0000 Subject: [PATCH 14/20] Update comment --- src/microbots/tools/tool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/microbots/tools/tool.py b/src/microbots/tools/tool.py index 39eaa4e..6b8351f 100644 --- a/src/microbots/tools/tool.py +++ b/src/microbots/tools/tool.py @@ -40,8 +40,8 @@ class ToolAbstract(ABC): Tool hierarchy: ToolAbstract (ABC) ├── Tool (ToolAbstract) — Docker sandbox tools (install_commands, env_variables, etc.) - └── ExternalTool (ToolAbstract) — Host-side tools (get_tool_definition, execute) - └── MemoryTool + └── ExternalTool (ToolAbstract) — LLM-native tools (get_tool_definition, execute) + └── AnthropicMemoryTool """ # TODO: Handle different instructions based on the platform (linux flavours, windows, mac) # TODO: Add versioning to tools From 2de384416d99d215da74c59c1c62b38ea008f81c Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Thu, 12 Mar 2026 08:34:26 +0000 Subject: [PATCH 15/20] Refactor test files: remove unnecessary imports and clean up code --- pytest.ini | 1 - .../tools/tool_definitions/__init__.py | 2 +- .../tools/tool_definitions/memory_tool.py | 1 + test/bot/test_memory_tool_integration.py | 5 ----- test/llm/test_anthropic_api.py | 22 ------------------- test/llm/test_llm.py | 3 ++- .../tool_definitions/test_memory_tool.py | 4 ---- 7 files changed, 4 insertions(+), 34 deletions(-) diff --git a/pytest.ini b/pytest.ini index 80758ec..d0e4f1f 100644 --- a/pytest.ini +++ b/pytest.ini @@ -11,6 +11,5 @@ addopts = markers = unit: Unit tests integration: Integration tests - anthropic_integration: Integration tests requiring a real Anthropic API key slow: Slow tests docker: marks tests that require a running Docker daemon and pull container images diff --git a/src/microbots/tools/tool_definitions/__init__.py b/src/microbots/tools/tool_definitions/__init__.py index 44e7592..1aa24ce 100644 --- a/src/microbots/tools/tool_definitions/__init__.py +++ b/src/microbots/tools/tool_definitions/__init__.py @@ -1 +1 @@ -from microbots.tools.tool_definitions.memory_tool import MemoryTool \ No newline at end of file +from microbots.tools.tool_definitions.memory_tool import MemoryTool diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py index 8c05ca8..c584f30 100644 --- a/src/microbots/tools/tool_definitions/memory_tool.py +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -61,6 +61,7 @@ - Paths must start with /memories/. - memory create overwrites if the file already exists. - memory str_replace requires the old text to appear exactly once. +- In memory view, use --end -1 to read through the end of the file. """ diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py index 3debe6c..b20c32b 100644 --- a/test/bot/test_memory_tool_integration.py +++ b/test/bot/test_memory_tool_integration.py @@ -12,7 +12,6 @@ import json import os -import sys from pathlib import Path from unittest.mock import Mock, patch @@ -21,10 +20,6 @@ load_dotenv() -sys.path.insert( - 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")) -) - from microbots import MicroBot, BotRunResult from microbots.llm.llm import llm_output_format_str from microbots.tools.tool_definitions.memory_tool import MemoryTool diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py index 4db2d96..674294c 100644 --- a/test/llm/test_anthropic_api.py +++ b/test/llm/test_anthropic_api.py @@ -98,9 +98,7 @@ def test_ask_successful_response(self): # Mock the Anthropic client response mock_response = Mock() - mock_response.stop_reason = "end_turn" mock_content = Mock() - mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "echo 'hello'", @@ -135,9 +133,7 @@ def test_ask_with_task_done_true(self): # Mock the Anthropic client response mock_response = Mock() - mock_response.stop_reason = "end_turn" mock_content = Mock() - mock_content.type = "text" mock_content.text = json.dumps({ "task_done": True, "command": "", @@ -161,16 +157,12 @@ def test_ask_with_retry_on_invalid_response(self): # Mock the Anthropic client to return invalid then valid response mock_invalid_response = Mock() - mock_invalid_response.stop_reason = "end_turn" mock_invalid_content = Mock() - mock_invalid_content.type = "text" mock_invalid_content.text = "invalid json" mock_invalid_response.content = [mock_invalid_content] mock_valid_response = Mock() - mock_valid_response.stop_reason = "end_turn" mock_valid_content = Mock() - mock_valid_content.type = "text" mock_valid_content.text = json.dumps({ "task_done": False, "command": "ls -la", @@ -201,9 +193,7 @@ def test_ask_appends_user_message(self): # Mock the Anthropic client response mock_response = Mock() - mock_response.stop_reason = "end_turn" mock_content = Mock() - mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "pwd", @@ -228,9 +218,7 @@ def test_ask_appends_assistant_response_as_json(self): # Mock the Anthropic client response mock_response = Mock() - mock_response.stop_reason = "end_turn" mock_content = Mock() - mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "echo test", @@ -259,9 +247,7 @@ def test_ask_uses_asdict_for_response(self): # Mock the Anthropic client response mock_response = Mock() - mock_response.stop_reason = "end_turn" mock_content = Mock() - mock_content.type = "text" response_dict = { "task_done": True, "command": "", @@ -291,9 +277,7 @@ def test_ask_resets_retries_to_zero(self): # Mock the Anthropic client response mock_response = Mock() - mock_response.stop_reason = "end_turn" mock_content = Mock() - mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "ls", @@ -315,9 +299,7 @@ def test_ask_extracts_json_from_markdown(self): # Mock response with markdown-wrapped JSON mock_response = Mock() - mock_response.stop_reason = "end_turn" mock_content = Mock() - mock_content.type = "text" mock_content.text = """Here's the response: ```json { @@ -438,9 +420,7 @@ def test_ask_with_empty_message(self): # Mock the Anthropic client response mock_response = Mock() - mock_response.stop_reason = "end_turn" mock_content = Mock() - mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "echo ''", @@ -463,9 +443,7 @@ def test_multiple_ask_calls_append_messages(self): # Mock the Anthropic client response mock_response = Mock() - mock_response.stop_reason = "end_turn" mock_content = Mock() - mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "pwd", diff --git a/test/llm/test_llm.py b/test/llm/test_llm.py index aa64fe7..bd1b82d 100644 --- a/test/llm/test_llm.py +++ b/test/llm/test_llm.py @@ -741,4 +741,5 @@ def test_task_done_true_with_not_none_command_field(self, llm): assert valid is False assert llm_response is None assert llm.retries == 1 - assert len(llm.messages) == 1 \ No newline at end of file + assert len(llm.messages) == 1 + assert "When 'task_done' is true, 'command' should be an empty string." in llm.messages[0]["content"] \ No newline at end of file diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py index 584aa54..6345b67 100644 --- a/test/tools/tool_definitions/test_memory_tool.py +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -4,14 +4,10 @@ All tests use pytest's tmp_path fixture so they are isolated from the user's real ~/.microbots/memory directory. """ -import sys -import os import pytest from pathlib import Path from unittest.mock import Mock -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../src"))) - from microbots.tools.tool_definitions.memory_tool import MemoryTool from microbots.environment.Environment import CmdReturn From ee0b08065f0f6305b6550d4578a491a795e6144b Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Thu, 12 Mar 2026 09:47:25 +0000 Subject: [PATCH 16/20] Refactor MemoryTool integration tests: streamline mock responses and add OpenAI tests --- test/bot/test_memory_tool_integration.py | 340 +++++++++++++++-------- 1 file changed, 224 insertions(+), 116 deletions(-) diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py index b20c32b..3c03df5 100644 --- a/test/bot/test_memory_tool_integration.py +++ b/test/bot/test_memory_tool_integration.py @@ -1,27 +1,12 @@ -"""Tests for memory tool end-to-end flow. - -Unit tests (mocked API): - Verify wiring, tool dispatch, and memory file operations - with a mocked Anthropic client. Fast, free, no API key needed. - -Integration tests (real API): - Hit the actual Anthropic API to verify the full round-trip. - Gated behind ``@pytest.mark.anthropic_integration``. - Require ``ANTHROPIC_API_KEY`` in .env. -""" - import json import os -from pathlib import Path from unittest.mock import Mock, patch import pytest -from dotenv import load_dotenv - -load_dotenv() from microbots import MicroBot, BotRunResult -from microbots.llm.llm import llm_output_format_str +from microbots.constants import DOCKER_WORKING_DIR, PermissionLabels +from microbots.extras.mount import Mount from microbots.tools.tool_definitions.memory_tool import MemoryTool @@ -29,8 +14,8 @@ # Helpers # --------------------------------------------------------------------------- -def _make_end_turn_response(task_done, thoughts, command=""): - """Build a mock Anthropic API response with stop_reason='end_turn'.""" +def _make_anthropic_response(task_done, thoughts, command=""): + """Build a mock Anthropic API response.""" payload = json.dumps({ "task_done": task_done, "thoughts": thoughts, @@ -47,13 +32,24 @@ def _make_end_turn_response(task_done, thoughts, command=""): return resp +def _make_openai_response(task_done, thoughts, command=""): + """Build a mock OpenAI API response with output_text.""" + resp = Mock() + resp.output_text = json.dumps({ + "task_done": task_done, + "thoughts": thoughts, + "command": command, + }) + return resp + + # --------------------------------------------------------------------------- # Tests # --------------------------------------------------------------------------- @pytest.mark.unit class TestMemoryToolWiring: - """Unit tests for provider-agnostic MemoryTool dispatch with a mocked Anthropic client.""" + """Unit tests for MemoryTool dispatch with a mocked Anthropic client.""" @pytest.fixture() def memory_dir(self, tmp_path): @@ -63,9 +59,9 @@ def memory_dir(self, tmp_path): @pytest.fixture() def bot(self, memory_dir): - """Create a MicroBot with Anthropic provider and a MemoryTool. + """Create a MicroBot with a mocked Anthropic provider and a MemoryTool. - The Anthropic client is mocked, but the rest of the stack is real: + The LLM client is mocked, but the rest of the stack is real: tool dispatch and memory file operations. """ tool = MemoryTool( @@ -109,12 +105,12 @@ def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir): # 1. ask(task) → API returns JSON command (memory create) # 2. ask(command output) → API returns end_turn (task_done=True) self._mock_client.messages.create.side_effect = [ - _make_end_turn_response( + _make_anthropic_response( task_done=False, thoughts="I'll save a note to memory.", command='memory create /memories/notes.md "Hello from integration test"', ), - _make_end_turn_response( + _make_anthropic_response( task_done=True, thoughts="Saved a note to memory successfully.", ), @@ -144,12 +140,12 @@ def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir): (memory_dir / "existing.md").write_text("Previously saved content") self._mock_client.messages.create.side_effect = [ - _make_end_turn_response( + _make_anthropic_response( task_done=False, thoughts="Let me check my memory.", command="memory view /memories/existing.md", ), - _make_end_turn_response( + _make_anthropic_response( task_done=True, thoughts="Found previously saved content in memory.", ), @@ -181,19 +177,19 @@ def test_create_then_view_memory_file(self, bot, memory_dir): """LLM creates a file, then views it — both dispatched via MicroBot loop.""" self._mock_client.messages.create.side_effect = [ # Step 1: create file - _make_end_turn_response( + _make_anthropic_response( task_done=False, thoughts="Creating a todo list.", command='memory create /memories/todo.md "- Fix bug #42\n- Write tests"', ), # Step 2: view file - _make_end_turn_response( + _make_anthropic_response( task_done=False, thoughts="Let me verify what I wrote.", command="memory view /memories/todo.md", ), # Step 3: done - _make_end_turn_response( + _make_anthropic_response( task_done=True, thoughts="Created and verified the todo list.", ), @@ -218,12 +214,12 @@ def test_create_then_view_memory_file(self, bot, memory_dir): def test_non_memory_commands_go_to_environment(self, bot): """Regular shell commands should be dispatched to the environment, not the memory tool.""" self._mock_client.messages.create.side_effect = [ - _make_end_turn_response( + _make_anthropic_response( task_done=False, thoughts="Let me check the files.", command="ls -la", ), - _make_end_turn_response( + _make_anthropic_response( task_done=True, thoughts="Done.", ), @@ -241,11 +237,135 @@ def test_non_memory_commands_go_to_environment(self, bot): # --------------------------------------------------------------------------- -# Real integration tests — require ANTHROPIC_API_KEY +# OpenAI wiring tests (mocked) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolWiringOpenAI: + """Unit tests for MemoryTool dispatch with a mocked OpenAI client.""" + + @pytest.fixture() + def memory_dir(self, tmp_path): + d = tmp_path / "memory" + d.mkdir() + return d + + @pytest.fixture() + def bot(self, memory_dir): + tool = MemoryTool( + memory_dir=str(memory_dir), + usage_instructions_to_llm="Use the memory tool to persist notes.", + ) + + mock_env = Mock() + mock_env.execute.return_value = Mock(return_code=0, stdout="", stderr="") + + openai_deployment = "gpt-4" + + with patch("microbots.llm.openai_api.OpenAI") as mock_openai_cls, \ + patch("microbots.llm.openai_api.api_key", "test-key"), \ + patch("microbots.llm.openai_api.endpoint", "https://api.openai.com"), \ + patch("microbots.llm.openai_api.deployment_name", openai_deployment): + + bot = MicroBot( + model=f"azure-openai/{openai_deployment}", + system_prompt="You are a helpful assistant.", + additional_tools=[tool], + environment=mock_env, + ) + + self._mock_client = mock_openai_cls.return_value + yield bot + del bot + + def test_memory_tool_is_retained(self, bot): + memory_tools = [t for t in bot.additional_tools if isinstance(t, MemoryTool)] + assert len(memory_tools) == 1 + + def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir): + """LLM requests a memory create → MicroBot dispatches → file appears on disk.""" + self._mock_client.responses.create.side_effect = [ + _make_openai_response( + task_done=False, + thoughts="I'll save a note to memory.", + command='memory create /memories/notes.md "Hello from OpenAI test"', + ), + _make_openai_response( + task_done=True, + thoughts="Saved a note to memory successfully.", + ), + ] + + result: BotRunResult = bot.run( + "Save a note saying 'Hello from OpenAI test'", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + assert result.error is None + + created_file = memory_dir / "notes.md" + assert created_file.exists() + assert created_file.read_text() == "Hello from OpenAI test" + + def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir): + """LLM requests a memory view → MicroBot dispatches → file content returned.""" + (memory_dir / "existing.md").write_text("Previously saved content") + + self._mock_client.responses.create.side_effect = [ + _make_openai_response( + task_done=False, + thoughts="Let me check my memory.", + command="memory view /memories/existing.md", + ), + _make_openai_response( + task_done=True, + thoughts="Found previously saved content in memory.", + ), + ] + + result: BotRunResult = bot.run( + "Check your memory for existing notes", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + + def test_non_memory_commands_go_to_environment(self, bot): + """Regular shell commands should be dispatched to the environment.""" + self._mock_client.responses.create.side_effect = [ + _make_openai_response( + task_done=False, + thoughts="Let me check the files.", + command="ls -la", + ), + _make_openai_response( + task_done=True, + thoughts="Done.", + ), + ] + + result: BotRunResult = bot.run( + "List the files", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + bot.environment.execute.assert_called_with("ls -la") + + +# --------------------------------------------------------------------------- +# Real integration tests — require Azure OpenAI + Docker # --------------------------------------------------------------------------- -MEMORY_SYSTEM_PROMPT = f"""You are a helpful assistant with access to a memory tool. -You can save and retrieve notes using the memory tool. +from microbots.llm.llm import llm_output_format_str + +def _memory_system_prompt(sandbox_path): + return f"""You are a helpful assistant with access to a shell environment. +The project code is available at {sandbox_path} in your shell environment. All your responses must be in this JSON format: {llm_output_format_str} The properties (task_done, thoughts, command) are mandatory on each response. @@ -253,20 +373,59 @@ def test_non_memory_commands_go_to_environment(self, bot): """ -@pytest.mark.anthropic_integration -@pytest.mark.docker -class TestMemoryToolRealApi: - """End-to-end integration tests that hit the real Anthropic API. +def _create_test_project(base_dir): + """Create a small Python project with deliberate code issues. - These tests exercise the full MicroBot → AnthropicApi → memory tool - pipeline with no mocking. A real Docker environment is created - (matching the AgentBoss integration test pattern). + Returns the path to the project directory. + """ + project = base_dir / "myproject" + project.mkdir() + + # utils.py — unused import 'os' + (project / "utils.py").write_text( + "import os\n" + "import json\n" + "\n" + "\n" + "def parse_config(raw: str) -> dict:\n" + ' """Parse a JSON config string."""\n' + " return json.loads(raw)\n" + ) + + # handler.py — missing null check around line 42 + lines = [f"# handler.py — request handler\n"] + lines += [f"# line {i}\n" for i in range(2, 42)] + lines.append("def handle_request(request):\n") # line 42 + lines.append(" user = request.get('user')\n") # line 43 + lines.append(" return user.name\n") # line 44 — will crash if user is None + (project / "handler.py").write_text("".join(lines)) + + # client.py — deprecated API call around line 88 + lines = [f"# client.py — HTTP client wrapper\n"] + lines += [f"# line {i}\n" for i in range(2, 88)] + lines.append("import urllib.request\n") # line 88 + lines.append("def fetch(url):\n") # line 89 + lines.append(" return urllib.request.urlopen(url).read() # deprecated: use requests\n") + (project / "client.py").write_text("".join(lines)) + + return project + + +@pytest.mark.integration +@pytest.mark.slow +class TestMemoryToolRealIntegration: + """End-to-end integration tests with Azure OpenAI. + + These tests exercise the full MicroBot → LLM → memory tool pipeline + with no mocking. A small Python project with deliberate code issues + is mounted into the Docker sandbox so the bot has real files to work on. Run with:: - pytest -m anthropic_integration + pytest -m integration test/bot/test_memory_tool_integration.py -v - Requires ``ANTHROPIC_API_KEY`` in ``.env``. + Requires ``AZURE_OPENAI_DEPLOYMENT_NAME`` (or defaults to + ``mini-swe-agent-gpt5``) and valid Azure OpenAI credentials. """ @pytest.fixture() @@ -276,102 +435,51 @@ def memory_dir(self, tmp_path): return d @pytest.fixture() - def memory_bot(self, memory_dir): - """Create a MicroBot with the real Anthropic API, real Docker env, - and a MemoryTool. No mocking — fully end-to-end. - """ + def test_project(self, tmp_path): + return _create_test_project(tmp_path) + + @pytest.fixture() + def memory_bot(self, memory_dir, test_project): tool = MemoryTool( memory_dir=str(memory_dir), - usage_instructions_to_llm="Use the memory tool to persist notes.", ) - anthropic_deployment = os.getenv("ANTHROPIC_DEPLOYMENT_NAME", "claude-sonnet-4-5") + model = f"azure-openai/{os.getenv('AZURE_OPENAI_DEPLOYMENT_NAME', 'mini-swe-agent-gpt5')}" + + sandbox_path = f"{DOCKER_WORKING_DIR}/{test_project.name}" + folder_mount = Mount( + str(test_project), + sandbox_path, + PermissionLabels.READ_ONLY, + ) bot = MicroBot( - model=f"anthropic/{anthropic_deployment}", - system_prompt=MEMORY_SYSTEM_PROMPT, + model=model, + system_prompt=_memory_system_prompt(sandbox_path), additional_tools=[tool], + folder_to_mount=folder_mount, ) yield bot del bot - def test_memory_tool_is_retained(self, memory_bot): - """MemoryTool should remain attached for Anthropic just like other providers.""" - memory_tools = [t for t in memory_bot.additional_tools if isinstance(t, MemoryTool)] - assert len(memory_tools) == 1, "Expected exactly one MemoryTool to remain attached" - def test_create_memory_file(self, memory_bot, memory_dir): - """MicroBot should persist a debugging plan to memory. - - The LLM is expected to: - 1. Receive a task about planning a debugging session. - 2. Decide to persist the plan using the memory tool. - 3. Confirm the task is done. - - We verify the plan was actually written to disk. - """ + """MicroBot should use memory tool while reviewing code for issues.""" result: BotRunResult = memory_bot.run( task=( - "You are investigating a bug where the server returns HTTP 500 " - "on POST /api/users. Create a debugging plan that includes: " - "1) check server logs, 2) reproduce the request with curl, " - "3) inspect the database connection. " - "Persist this plan so you can resume later if interrupted." + "Review the Python project for code quality issues. " + "Check each .py file for unused imports, missing error handling, " + "and deprecated API usage. Report all issues you find." ), - max_iterations=10, - timeout_in_seconds=60, + max_iterations=15, + timeout_in_seconds=300, ) assert result.status is True, f"Task failed: {result.error}" assert result.error is None - # The LLM should have used the memory tool to persist the plan saved_files = [f for f in memory_dir.rglob("*") if f.is_file()] assert len(saved_files) >= 1, ( f"Expected at least one file created in memory. " f"Found: {saved_files}" ) - combined_content = "\n".join(f.read_text() for f in saved_files).lower() - assert "log" in combined_content or "curl" in combined_content or "database" in combined_content, ( - f"Expected debugging plan content in memory files. Content: {combined_content}" - ) - - def test_create_and_view_roundtrip(self, memory_bot, memory_dir): - """MicroBot should save findings and then review them before reporting. - - The LLM is expected to: - 1. Record analysis findings using the memory tool. - 2. Review what it recorded to verify nothing was missed. - 3. Summarize the findings in its final thoughts. - - We verify: - - At least one file was written to disk. - - The LLM's summary references the recorded findings. - """ - result: BotRunResult = memory_bot.run( - task=( - "You analyzed a Python project and found these issues: " - "1) an unused import 'os' in utils.py, " - "2) a missing null check in handler.py line 42. " - "Record these findings, then review your notes and " - "summarize what you found in your final thoughts." - ), - max_iterations=15, - timeout_in_seconds=60, - ) - - assert result.status is True, f"Task failed: {result.error}" - assert result.error is None - - # The LLM should have created at least one memory file - saved_files = [f for f in memory_dir.rglob("*") if f.is_file()] - assert len(saved_files) >= 1, ( - f"Expected at least one file in memory. " - f"Found: {list(memory_dir.rglob('*'))}" - ) - - result_lower = result.result.lower() - assert "import" in result_lower or "null" in result_lower or "handler" in result_lower, ( - f"LLM should have summarized the findings. Got: {result.result}" - ) From fc45c0e524c6b83cd59ec9288dec58b948f007c8 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Thu, 12 Mar 2026 10:38:52 +0000 Subject: [PATCH 17/20] Enhance MemoryTool: integrate argparse for command parsing and improve subcommand handling --- .../tools/tool_definitions/memory_tool.py | 233 +++++++++--------- .../tool_definitions/test_memory_tool.py | 113 ++++----- 2 files changed, 170 insertions(+), 176 deletions(-) diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py index c584f30..5f0c7fc 100644 --- a/src/microbots/tools/tool_definitions/memory_tool.py +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -1,3 +1,4 @@ +import argparse import logging import os import shlex @@ -12,6 +13,13 @@ logger = logging.getLogger(" 🧠 MemoryTool") + +class _NoExitArgumentParser(argparse.ArgumentParser): + """ArgumentParser that raises ``ValueError`` instead of calling ``sys.exit``.""" + + def error(self, message: str) -> None: # type: ignore[override] + raise ValueError(message) + INSTRUCTIONS_TO_LLM = """ Use this tool to persist information to files across steps — same interface as the Anthropic memory tool. All paths must be under /memories/. @@ -101,6 +109,42 @@ def __post_init__(self): base = Path(self.memory_dir) if self.memory_dir else Path.home() / ".microbots" / "memory" self._memory_dir = base self._memory_dir.mkdir(parents=True, exist_ok=True) + self._parser = self._build_parser() + + def _build_parser(self) -> _NoExitArgumentParser: + """Build the argparse parser with subparsers for each memory subcommand.""" + parser = _NoExitArgumentParser(prog="memory", add_help=False) + subs = parser.add_subparsers(dest="subcommand") + + p_view = subs.add_parser("view", add_help=False) + p_view.add_argument("path") + p_view.add_argument("--start", type=int, default=None) + p_view.add_argument("--end", type=int, default=None) + + p_create = subs.add_parser("create", add_help=False) + p_create.add_argument("path") + p_create.add_argument("content", nargs=argparse.REMAINDER) + + p_str = subs.add_parser("str_replace", add_help=False) + p_str.add_argument("path") + p_str.add_argument("--old", required=True) + p_str.add_argument("--new", required=True) + + p_ins = subs.add_parser("insert", add_help=False) + p_ins.add_argument("path") + p_ins.add_argument("--line", type=int, required=True) + p_ins.add_argument("--text", required=True) + + p_del = subs.add_parser("delete", add_help=False) + p_del.add_argument("path") + + p_ren = subs.add_parser("rename", add_help=False) + p_ren.add_argument("old_path") + p_ren.add_argument("new_path") + + subs.add_parser("clear", add_help=False) + + return parser # ---------------------------------------------------------------------- # # Path helpers @@ -138,7 +182,8 @@ def _resolve(self, path: str) -> Path: # ---------------------------------------------------------------------- # def is_invoked(self, command: str) -> bool: - return command.strip().startswith("memory ") + cmd = command.strip() + return cmd == "memory" or cmd.startswith("memory ") def invoke(self, command: str, parent_bot) -> CmdReturn: try: @@ -146,30 +191,28 @@ def invoke(self, command: str, parent_bot) -> CmdReturn: except ValueError as exc: return CmdReturn(stdout="", stderr=f"Parse error: {exc}", return_code=1) - if len(tokens) < 2: + try: + args = self._parser.parse_args(tokens[1:]) # skip "memory" + except ValueError as exc: + return CmdReturn(stdout="", stderr=str(exc), return_code=1) + + if args.subcommand is None: return CmdReturn(stdout="", stderr="Usage: memory ...", return_code=1) - sub = tokens[1] - args = tokens[2:] + dispatch = { + "view": self._view, + "create": self._create, + "str_replace": self._str_replace, + "insert": self._insert, + "delete": self._delete, + "rename": self._rename, + } try: - if sub == "view": - return self._view(args) - elif sub == "create": - return self._create(args) - elif sub == "str_replace": - return self._str_replace(args) - elif sub == "insert": - return self._insert(args) - elif sub == "delete": - return self._delete(args) - elif sub == "rename": - return self._rename(args) - elif sub == "clear": + if args.subcommand == "clear": return self._clear() - else: - return CmdReturn(stdout="", stderr=f"Unknown subcommand: {sub!r}", return_code=1) - except (ValueError, FileNotFoundError, RuntimeError) as exc: + return dispatch[args.subcommand](args) + except (OSError, ValueError, RuntimeError, UnicodeDecodeError) as exc: logger.error("🧠 MemoryTool error: %s", exc) return CmdReturn(stdout="", stderr=str(exc), return_code=1) @@ -177,26 +220,10 @@ def invoke(self, command: str, parent_bot) -> CmdReturn: # Subcommand handlers # ---------------------------------------------------------------------- # - def _view(self, args: list) -> CmdReturn: - if not args: - return CmdReturn(stdout="", stderr="Usage: memory view [--start N] [--end N]", return_code=1) - - path = args[0] - start_line = None - end_line = None - i = 1 - while i < len(args): - if args[i] == "--start" and i + 1 < len(args): - start_line = int(args[i + 1]); i += 2 - elif args[i] == "--end" and i + 1 < len(args): - end_line = int(args[i + 1]); i += 2 - else: - logger.warning("🧠 MemoryTool view: unknown flag %r (skipped)", args[i]) - i += 1 - - resolved = self._resolve(path) + def _view(self, args: argparse.Namespace) -> CmdReturn: + resolved = self._resolve(args.path) if not resolved.exists(): - return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1) + return CmdReturn(stdout="", stderr=f"Path not found: {args.path!r}", return_code=1) if resolved.is_dir(): items = [ @@ -204,13 +231,13 @@ def _view(self, args: list) -> CmdReturn: for item in sorted(resolved.iterdir()) if not item.name.startswith(".") ] - result = f"Directory: {path}\n" + "\n".join(f"- {i}" for i in items) + result = f"Directory: {args.path}\n" + "\n".join(f"- {i}" for i in items) return CmdReturn(stdout=result, stderr="", return_code=0) lines = resolved.read_text(encoding="utf-8").splitlines() - if start_line is not None or end_line is not None: - s = max(0, (start_line or 1) - 1) - e = len(lines) if (end_line is None or end_line == -1) else end_line + if args.start is not None or args.end is not None: + s = max(0, (args.start or 1) - 1) + e = len(lines) if (args.end is None or args.end == -1) else args.end lines = lines[s:e] base_num = s + 1 else: @@ -218,101 +245,65 @@ def _view(self, args: list) -> CmdReturn: numbered = "\n".join(f"{i + base_num:4d}: {line}" for i, line in enumerate(lines)) return CmdReturn(stdout=numbered, stderr="", return_code=0) - def _create(self, args: list) -> CmdReturn: - if len(args) < 2: + def _create(self, args: argparse.Namespace) -> CmdReturn: + if not args.content: return CmdReturn(stdout="", stderr="Usage: memory create ", return_code=1) - path, content = args[0], args[1] - resolved = self._resolve(path) + content = " ".join(args.content) + resolved = self._resolve(args.path) resolved.parent.mkdir(parents=True, exist_ok=True) resolved.write_text(content, encoding="utf-8") - logger.info("🧠 Memory file created: %s", path) - return CmdReturn(stdout=f"File created: {path}", stderr="", return_code=0) - - def _str_replace(self, args: list) -> CmdReturn: - if not args: - return CmdReturn(stdout="", stderr="Usage: memory str_replace --old --new ", return_code=1) - path = args[0] - old_text = new_text = None - i = 1 - while i < len(args): - if args[i] == "--old" and i + 1 < len(args): - old_text = args[i + 1]; i += 2 - elif args[i] == "--new" and i + 1 < len(args): - new_text = args[i + 1]; i += 2 - else: - logger.warning("🧠 MemoryTool str_replace: unknown flag %r (skipped)", args[i]) - i += 1 - if old_text is None or new_text is None: - return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1) - resolved = self._resolve(path) + logger.info("🧠 Memory file created: %s", args.path) + return CmdReturn(stdout=f"File created: {args.path}", stderr="", return_code=0) + + def _str_replace(self, args: argparse.Namespace) -> CmdReturn: + resolved = self._resolve(args.path) if not resolved.is_file(): - return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1) + return CmdReturn(stdout="", stderr=f"File not found: {args.path!r}", return_code=1) content = resolved.read_text(encoding="utf-8") - count = content.count(old_text) + count = content.count(args.old) if count == 0: - return CmdReturn(stdout="", stderr=f"Text not found in {path!r}", return_code=1) + return CmdReturn(stdout="", stderr=f"Text not found in {args.path!r}", return_code=1) if count > 1: - return CmdReturn(stdout="", stderr=f"Text appears {count} times in {path!r} — must be unique", return_code=1) - resolved.write_text(content.replace(old_text, new_text, 1), encoding="utf-8") - return CmdReturn(stdout=f"File {path} edited.", stderr="", return_code=0) - - def _insert(self, args: list) -> CmdReturn: - if not args: - return CmdReturn(stdout="", stderr="Usage: memory insert --line N --text ", return_code=1) - path = args[0] - line_num = text = None - i = 1 - while i < len(args): - if args[i] == "--line" and i + 1 < len(args): - line_num = int(args[i + 1]); i += 2 - elif args[i] == "--text" and i + 1 < len(args): - text = args[i + 1]; i += 2 - else: - logger.warning("🧠 MemoryTool insert: unknown flag %r (skipped)", args[i]) - i += 1 - if line_num is None or text is None: - return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1) - resolved = self._resolve(path) + return CmdReturn(stdout="", stderr=f"Text appears {count} times in {args.path!r} — must be unique", return_code=1) + resolved.write_text(content.replace(args.old, args.new, 1), encoding="utf-8") + return CmdReturn(stdout=f"File {args.path} edited.", stderr="", return_code=0) + + def _insert(self, args: argparse.Namespace) -> CmdReturn: + resolved = self._resolve(args.path) if not resolved.is_file(): - return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1) - lines = resolved.read_text(encoding="utf-8").splitlines() - if line_num < 0 or line_num > len(lines): - return CmdReturn(stdout="", stderr=f"Invalid line number {line_num}. Must be 0–{len(lines)}.", return_code=1) - lines.insert(line_num, text.rstrip("\n")) - resolved.write_text("\n".join(lines) + "\n", encoding="utf-8") - return CmdReturn(stdout=f"Text inserted at line {line_num} in {path}.", stderr="", return_code=0) - - def _delete(self, args: list) -> CmdReturn: - if not args: - return CmdReturn(stdout="", stderr="Usage: memory delete ", return_code=1) - path = args[0] - if path.rstrip("/") in ("/memories", "memories", ""): + return CmdReturn(stdout="", stderr=f"File not found: {args.path!r}", return_code=1) + file_lines = resolved.read_text(encoding="utf-8").splitlines() + if args.line < 0 or args.line > len(file_lines): + return CmdReturn(stdout="", stderr=f"Invalid line number {args.line}. Must be 0–{len(file_lines)}.", return_code=1) + file_lines.insert(args.line, args.text.rstrip("\n")) + resolved.write_text("\n".join(file_lines) + "\n", encoding="utf-8") + return CmdReturn(stdout=f"Text inserted at line {args.line} in {args.path}.", stderr="", return_code=0) + + def _delete(self, args: argparse.Namespace) -> CmdReturn: + if args.path.rstrip("/") in ("/memories", "memories", ""): return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1) - resolved = self._resolve(path) + resolved = self._resolve(args.path) if resolved.is_file(): resolved.unlink() - logger.info("🧠 Memory file deleted: %s", path) - return CmdReturn(stdout=f"Deleted: {path}", stderr="", return_code=0) + logger.info("🧠 Memory file deleted: %s", args.path) + return CmdReturn(stdout=f"Deleted: {args.path}", stderr="", return_code=0) if resolved.is_dir(): shutil.rmtree(resolved) - logger.info("🧠 Memory directory deleted: %s", path) - return CmdReturn(stdout=f"Deleted directory: {path}", stderr="", return_code=0) - return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1) - - def _rename(self, args: list) -> CmdReturn: - if len(args) < 2: - return CmdReturn(stdout="", stderr="Usage: memory rename ", return_code=1) - old_path, new_path = args[0], args[1] - old_resolved = self._resolve(old_path) - new_resolved = self._resolve(new_path) + logger.info("🧠 Memory directory deleted: %s", args.path) + return CmdReturn(stdout=f"Deleted directory: {args.path}", stderr="", return_code=0) + return CmdReturn(stdout="", stderr=f"Path not found: {args.path!r}", return_code=1) + + def _rename(self, args: argparse.Namespace) -> CmdReturn: + old_resolved = self._resolve(args.old_path) + new_resolved = self._resolve(args.new_path) if not old_resolved.exists(): - return CmdReturn(stdout="", stderr=f"Source not found: {old_path!r}", return_code=1) + return CmdReturn(stdout="", stderr=f"Source not found: {args.old_path!r}", return_code=1) if new_resolved.exists(): - return CmdReturn(stdout="", stderr=f"Destination already exists: {new_path!r}", return_code=1) + return CmdReturn(stdout="", stderr=f"Destination already exists: {args.new_path!r}", return_code=1) new_resolved.parent.mkdir(parents=True, exist_ok=True) old_resolved.rename(new_resolved) - logger.info("🧠 Memory renamed: %s → %s", old_path, new_path) - return CmdReturn(stdout=f"Renamed {old_path} to {new_path}.", stderr="", return_code=0) + logger.info("🧠 Memory renamed: %s → %s", args.old_path, args.new_path) + return CmdReturn(stdout=f"Renamed {args.old_path} to {args.new_path}.", stderr="", return_code=0) def _clear(self) -> CmdReturn: if self._memory_dir.exists(): diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py index 6345b67..1ff1df6 100644 --- a/test/tools/tool_definitions/test_memory_tool.py +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -5,6 +5,7 @@ user's real ~/.microbots/memory directory. """ import pytest +from argparse import Namespace from pathlib import Path from unittest.mock import Mock @@ -56,6 +57,12 @@ def test_returns_true_for_memory_commands(self, tmp_path): assert tool.is_invoked("memory view /memories") is True assert tool.is_invoked("memory create /memories/f.md hello") is True + def test_returns_true_for_bare_memory(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked("memory") is True + assert tool.is_invoked("memory\n") is True + assert tool.is_invoked(" memory ") is True + def test_returns_false_for_other_commands(self, tmp_path): tool = make_tool(tmp_path) assert tool.is_invoked("ls -la") is False @@ -134,7 +141,7 @@ def test_view_directory_lists_contents(self, tmp_path): (tool._memory_dir / "notes.md").write_text("hello") (tool._memory_dir / "sub").mkdir() - result = tool._view(["/memories"]) + result = tool._view(Namespace(path="/memories", start=None, end=None)) assert result.return_code == 0 assert "notes.md" in result.stdout @@ -144,7 +151,7 @@ def test_view_file_returns_numbered_lines(self, tmp_path): tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("line1\nline2\nline3\n") - result = tool._view(["/memories/f.md"]) + result = tool._view(Namespace(path="/memories/f.md", start=None, end=None)) assert result.return_code == 0 assert "1:" in result.stdout @@ -155,7 +162,7 @@ def test_view_file_with_line_range(self, tmp_path): tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n") - result = tool._view(["/memories/f.md", "--start", "2", "--end", "4"]) + result = tool._view(Namespace(path="/memories/f.md", start=2, end=4)) assert result.return_code == 0 assert "b" in result.stdout @@ -166,22 +173,21 @@ def test_view_file_with_line_range(self, tmp_path): def test_view_nonexistent_path_returns_error(self, tmp_path): tool = make_tool(tmp_path) - result = tool._view(["/memories/nonexistent.md"]) + result = tool._view(Namespace(path="/memories/nonexistent.md", start=None, end=None)) assert result.return_code != 0 def test_view_no_args_returns_error(self, tmp_path): tool = make_tool(tmp_path) - result = tool._view([]) + result = tool.invoke("memory view", parent_bot=Mock()) assert result.return_code != 0 - def test_view_unknown_flag_is_skipped(self, tmp_path): - """else: i += 1 — unrecognised flags are silently skipped.""" + def test_view_unknown_flag_returns_error(self, tmp_path): + """Argparse rejects unrecognised flags.""" tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("hello\n") - result = tool._view(["/memories/f.md", "--bogus", "value"]) - assert result.return_code == 0 - assert "hello" in result.stdout + result = tool.invoke("memory view /memories/f.md --bogus value", parent_bot=Mock()) + assert result.return_code != 0 # --------------------------------------------------------------------------- @@ -194,7 +200,7 @@ class TestMemoryToolCreate: def test_create_writes_file(self, tmp_path): tool = make_tool(tmp_path) - result = tool._create(["/memories/notes.md", "hello world"]) + result = tool._create(Namespace(path="/memories/notes.md", content=["hello world"])) assert result.return_code == 0 assert (tool._memory_dir / "notes.md").read_text() == "hello world" @@ -203,7 +209,7 @@ def test_create_overwrites_existing_file(self, tmp_path): tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("old content") - result = tool._create(["/memories/f.md", "new content"]) + result = tool._create(Namespace(path="/memories/f.md", content=["new content"])) assert result.return_code == 0 assert (tool._memory_dir / "f.md").read_text() == "new content" @@ -211,14 +217,14 @@ def test_create_overwrites_existing_file(self, tmp_path): def test_create_creates_parent_directories(self, tmp_path): tool = make_tool(tmp_path) - result = tool._create(["/memories/sub/dir/f.md", "content"]) + result = tool._create(Namespace(path="/memories/sub/dir/f.md", content=["content"])) assert result.return_code == 0 assert (tool._memory_dir / "sub" / "dir" / "f.md").exists() def test_create_missing_args_returns_error(self, tmp_path): tool = make_tool(tmp_path) - result = tool._create(["/memories/f.md"]) # missing content + result = tool.invoke("memory create /memories/f.md", parent_bot=Mock()) # missing content assert result.return_code != 0 @@ -233,7 +239,7 @@ def test_str_replace_replaces_unique_text(self, tmp_path): tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("hello world") - result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "goodbye"]) + result = tool._str_replace(Namespace(path="/memories/f.md", old="hello", new="goodbye")) assert result.return_code == 0 assert (tool._memory_dir / "f.md").read_text() == "goodbye world" @@ -242,7 +248,7 @@ def test_str_replace_fails_when_text_not_found(self, tmp_path): tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("hello world") - result = tool._str_replace(["/memories/f.md", "--old", "nothere", "--new", "x"]) + result = tool._str_replace(Namespace(path="/memories/f.md", old="nothere", new="x")) assert result.return_code != 0 assert "not found" in result.stderr.lower() @@ -251,7 +257,7 @@ def test_str_replace_fails_when_text_not_unique(self, tmp_path): tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("hello hello") - result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "bye"]) + result = tool._str_replace(Namespace(path="/memories/f.md", old="hello", new="bye")) assert result.return_code != 0 assert "2" in result.stderr # appears N times @@ -259,30 +265,29 @@ def test_str_replace_fails_when_text_not_unique(self, tmp_path): def test_str_replace_missing_flags_returns_error(self, tmp_path): tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("text") - result = tool._str_replace(["/memories/f.md"]) + result = tool.invoke("memory str_replace /memories/f.md", parent_bot=Mock()) assert result.return_code != 0 def test_str_replace_empty_args_returns_usage_error(self, tmp_path): - """if not args branch: calling _str_replace([]) returns the usage message.""" + """Calling str_replace with no args returns an error via argparse.""" tool = make_tool(tmp_path) - result = tool._str_replace([]) + result = tool.invoke("memory str_replace", parent_bot=Mock()) assert result.return_code == 1 - assert "Usage: memory str_replace" in result.stderr def test_str_replace_nonexistent_file_returns_error(self, tmp_path): tool = make_tool(tmp_path) - result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"]) + result = tool._str_replace(Namespace(path="/memories/missing.md", old="a", new="b")) assert result.return_code != 0 - def test_str_replace_unknown_flag_is_skipped(self, tmp_path): - """else: i += 1 — unrecognised flags in the arg loop are silently skipped.""" + def test_str_replace_unknown_flag_returns_error(self, tmp_path): + """Argparse rejects unrecognised flags.""" tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("hello world") - result = tool._str_replace([ - "/memories/f.md", "--bogus", "ignored", "--old", "hello", "--new", "goodbye" - ]) - assert result.return_code == 0 - assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + result = tool.invoke( + 'memory str_replace /memories/f.md --bogus ignored --old "hello" --new "goodbye"', + parent_bot=Mock(), + ) + assert result.return_code != 0 # --------------------------------------------------------------------------- @@ -296,7 +301,7 @@ def test_insert_prepends_at_line_zero(self, tmp_path): tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("line1\nline2\n") - result = tool._insert(["/memories/f.md", "--line", "0", "--text", "prepended"]) + result = tool._insert(Namespace(path="/memories/f.md", line=0, text="prepended")) assert result.return_code == 0 lines = (tool._memory_dir / "f.md").read_text().splitlines() @@ -307,7 +312,7 @@ def test_insert_at_end_of_file(self, tmp_path): tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("line1\nline2\n") - result = tool._insert(["/memories/f.md", "--line", "2", "--text", "appended"]) + result = tool._insert(Namespace(path="/memories/f.md", line=2, text="appended")) assert result.return_code == 0 lines = (tool._memory_dir / "f.md").read_text().splitlines() @@ -317,38 +322,36 @@ def test_insert_invalid_line_number_returns_error(self, tmp_path): tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("line1\n") - result = tool._insert(["/memories/f.md", "--line", "99", "--text", "x"]) + result = tool._insert(Namespace(path="/memories/f.md", line=99, text="x")) assert result.return_code != 0 def test_insert_nonexistent_file_returns_error(self, tmp_path): tool = make_tool(tmp_path) - result = tool._insert(["/memories/missing.md", "--line", "0", "--text", "x"]) + result = tool._insert(Namespace(path="/memories/missing.md", line=0, text="x")) assert result.return_code != 0 def test_insert_missing_flags_returns_error(self, tmp_path): tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("line1\n") - result = tool._insert(["/memories/f.md"]) + result = tool.invoke("memory insert /memories/f.md", parent_bot=Mock()) assert result.return_code != 0 def test_insert_empty_args_returns_usage_error(self, tmp_path): - """if not args branch: calling _insert([]) returns the usage message.""" + """Calling insert with no args returns an error via argparse.""" tool = make_tool(tmp_path) - result = tool._insert([]) + result = tool.invoke("memory insert", parent_bot=Mock()) assert result.return_code == 1 - assert "Usage: memory insert" in result.stderr - def test_insert_unknown_flag_is_skipped(self, tmp_path): - """else: i += 1 — unrecognised flags in the arg loop are silently skipped.""" + def test_insert_unknown_flag_returns_error(self, tmp_path): + """Argparse rejects unrecognised flags.""" tool = make_tool(tmp_path) (tool._memory_dir / "f.md").write_text("line1\nline2\n") - result = tool._insert([ - "/memories/f.md", "--bogus", "ignored", "--line", "0", "--text", "prepended" - ]) - assert result.return_code == 0 - lines = (tool._memory_dir / "f.md").read_text().splitlines() - assert lines[0] == "prepended" + result = tool.invoke( + 'memory insert /memories/f.md --bogus ignored --line 0 --text "prepended"', + parent_bot=Mock(), + ) + assert result.return_code != 0 # --------------------------------------------------------------------------- @@ -363,7 +366,7 @@ def test_delete_removes_file(self, tmp_path): f = tool._memory_dir / "f.md" f.write_text("data") - result = tool._delete(["/memories/f.md"]) + result = tool._delete(Namespace(path="/memories/f.md")) assert result.return_code == 0 assert not f.exists() @@ -374,7 +377,7 @@ def test_delete_removes_directory(self, tmp_path): sub.mkdir() (sub / "f.md").write_text("data") - result = tool._delete(["/memories/sub"]) + result = tool._delete(Namespace(path="/memories/sub")) assert result.return_code == 0 assert not sub.exists() @@ -382,17 +385,17 @@ def test_delete_removes_directory(self, tmp_path): def test_delete_prevents_root_deletion(self, tmp_path): tool = make_tool(tmp_path) for path in ("/memories", "memories", "/memories/"): - result = tool._delete([path]) + result = tool._delete(Namespace(path=path)) assert result.return_code != 0 def test_delete_nonexistent_path_raises(self, tmp_path): tool = make_tool(tmp_path) - result = tool._delete(["/memories/nonexistent.md"]) + result = tool._delete(Namespace(path="/memories/nonexistent.md")) assert result.return_code != 0 def test_delete_no_args_returns_error(self, tmp_path): tool = make_tool(tmp_path) - result = tool._delete([]) + result = tool.invoke("memory delete", parent_bot=Mock()) assert result.return_code != 0 @@ -408,7 +411,7 @@ def test_rename_moves_file(self, tmp_path): src = tool._memory_dir / "old.md" src.write_text("content") - result = tool._rename(["/memories/old.md", "/memories/new.md"]) + result = tool._rename(Namespace(old_path="/memories/old.md", new_path="/memories/new.md")) assert result.return_code == 0 assert not src.exists() @@ -416,7 +419,7 @@ def test_rename_moves_file(self, tmp_path): def test_rename_nonexistent_source_returns_error(self, tmp_path): tool = make_tool(tmp_path) - result = tool._rename(["/memories/missing.md", "/memories/new.md"]) + result = tool._rename(Namespace(old_path="/memories/missing.md", new_path="/memories/new.md")) assert result.return_code != 0 def test_rename_fails_if_destination_exists(self, tmp_path): @@ -424,13 +427,13 @@ def test_rename_fails_if_destination_exists(self, tmp_path): (tool._memory_dir / "a.md").write_text("a") (tool._memory_dir / "b.md").write_text("b") - result = tool._rename(["/memories/a.md", "/memories/b.md"]) + result = tool._rename(Namespace(old_path="/memories/a.md", new_path="/memories/b.md")) assert result.return_code != 0 def test_rename_missing_args_returns_error(self, tmp_path): tool = make_tool(tmp_path) - result = tool._rename(["/memories/a.md"]) + result = tool.invoke("memory rename /memories/a.md", parent_bot=Mock()) assert result.return_code != 0 @@ -510,7 +513,7 @@ def test_invoke_unknown_subcommand_returns_error(self, tmp_path): tool = make_tool(tmp_path) result = tool.invoke("memory frobnicate /memories/f.md", parent_bot=Mock()) assert result.return_code != 0 - assert "Unknown subcommand" in result.stderr + assert "invalid choice" in result.stderr def test_invoke_too_few_tokens_returns_error(self, tmp_path): tool = make_tool(tmp_path) From 3a9592d3b570ff56b3b8d01dfe9291464fbb1f87 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Fri, 13 Mar 2026 02:00:33 +0000 Subject: [PATCH 18/20] resolve copilot comments --- .../tools/tool_definitions/memory_tool.py | 15 ++-- .../tool_definitions/test_memory_tool.py | 76 ++++++++++++++++--- 2 files changed, 74 insertions(+), 17 deletions(-) diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py index 5f0c7fc..599137a 100644 --- a/src/microbots/tools/tool_definitions/memory_tool.py +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -158,17 +158,15 @@ def _resolve(self, path: str) -> Path: if ".." in Path(stripped).parts: raise ValueError(f"Path traversal not allowed: {path!r}") - if path.startswith("/") and stripped != "memories" and not stripped.startswith("memories/"): + if stripped != "memories" and not stripped.startswith("memories/"): raise ValueError( - f"Invalid memory path: {path!r}. Use paths under /memories/." + f"Invalid memory path: {path!r}. Paths must start with /memories/." ) if stripped == "memories": rel = "" - elif stripped.startswith("memories/"): - rel = stripped[len("memories/"):] else: - rel = stripped # treat as relative to memory_dir + rel = stripped[len("memories/"):] resolved = (self._memory_dir / rel).resolve() if rel else self._memory_dir.resolve() # Use trailing separator to prevent prefix confusion with sibling dirs @@ -283,6 +281,8 @@ def _delete(self, args: argparse.Namespace) -> CmdReturn: if args.path.rstrip("/") in ("/memories", "memories", ""): return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1) resolved = self._resolve(args.path) + if resolved == self._memory_dir.resolve(): + return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1) if resolved.is_file(): resolved.unlink() logger.info("🧠 Memory file deleted: %s", args.path) @@ -296,6 +296,11 @@ def _delete(self, args: argparse.Namespace) -> CmdReturn: def _rename(self, args: argparse.Namespace) -> CmdReturn: old_resolved = self._resolve(args.old_path) new_resolved = self._resolve(args.new_path) + memory_root = self._memory_dir.resolve() + if old_resolved == memory_root: + return CmdReturn(stdout="", stderr="Cannot rename the /memories root directory", return_code=1) + if new_resolved == memory_root: + return CmdReturn(stdout="", stderr="Cannot overwrite the /memories root directory", return_code=1) if not old_resolved.exists(): return CmdReturn(stdout="", stderr=f"Source not found: {args.old_path!r}", return_code=1) if new_resolved.exists(): diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py index 1ff1df6..e7de107 100644 --- a/test/tools/tool_definitions/test_memory_tool.py +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -114,19 +114,13 @@ def test_resolve_rejects_non_memory_paths(self, tmp_path): with pytest.raises(ValueError): tool._resolve(bad) - def test_resolve_bare_relative_path_treated_as_relative_to_memory_dir(self, tmp_path): - """The else branch: a path without a /memories/ prefix is resolved - relative to memory_dir.""" + def test_resolve_rejects_bare_relative_paths(self, tmp_path): + """Bare relative paths without /memories/ prefix must be rejected + to match the documented contract.""" tool = make_tool(tmp_path) - resolved = tool._resolve("notes.md") - assert resolved == (tool._memory_dir / "notes.md").resolve() - - def test_resolve_bare_relative_subdir_path(self, tmp_path): - """A bare relative path with subdirectory components is also resolved - relative to memory_dir (else branch).""" - tool = make_tool(tmp_path) - resolved = tool._resolve("sub/dir/file.md") - assert resolved == (tool._memory_dir / "sub" / "dir" / "file.md").resolve() + for bad in ("notes.md", "sub/dir/file.md", "file.txt"): + with pytest.raises(ValueError, match="Paths must start with /memories/"): + tool._resolve(bad) # --------------------------------------------------------------------------- @@ -388,6 +382,28 @@ def test_delete_prevents_root_deletion(self, tmp_path): result = tool._delete(Namespace(path=path)) assert result.return_code != 0 + @pytest.mark.parametrize("path", [ + "/memories/.", # dot resolves to root + "/memories/./", # trailing slash variant + "//memories", # double leading slash + "//memories/", # double leading slash + trailing slash + "memories/.", # relative with dot + "memories/./", # relative with dot + trailing slash + ]) + def test_delete_prevents_root_deletion_normalized_paths(self, tmp_path, path): + """Root-equivalent paths that bypass the simple string guard must + still be rejected so that the entire memory directory is never + wiped out via ``_delete``.""" + tool = make_tool(tmp_path) + # Seed the directory so we can verify it survives + (tool._memory_dir / "keep.md").write_text("important") + + result = tool._delete(Namespace(path=path)) + + assert result.return_code != 0, f"path {path!r} was not rejected" + assert tool._memory_dir.exists(), f"path {path!r} deleted the memory root" + assert (tool._memory_dir / "keep.md").exists(), f"path {path!r} wiped memory contents" + def test_delete_nonexistent_path_raises(self, tmp_path): tool = make_tool(tmp_path) result = tool._delete(Namespace(path="/memories/nonexistent.md")) @@ -436,6 +452,42 @@ def test_rename_missing_args_returns_error(self, tmp_path): result = tool.invoke("memory rename /memories/a.md", parent_bot=Mock()) assert result.return_code != 0 + @pytest.mark.parametrize("old_path", [ + "/memories", + "/memories/", + "/memories/.", + "//memories", + "memories/.", + ]) + def test_rename_prevents_renaming_root_as_source(self, tmp_path, old_path): + """Renaming the memory root itself must be rejected.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "keep.md").write_text("important") + + result = tool._rename(Namespace(old_path=old_path, new_path="/memories/newdir")) + + assert result.return_code != 0, f"old_path {old_path!r} was not rejected" + assert tool._memory_dir.exists(), f"old_path {old_path!r} moved the memory root" + assert (tool._memory_dir / "keep.md").exists() + + @pytest.mark.parametrize("new_path", [ + "/memories", + "/memories/", + "/memories/.", + "//memories", + "memories/.", + ]) + def test_rename_prevents_overwriting_root_as_destination(self, tmp_path, new_path): + """Renaming onto the memory root must be rejected.""" + tool = make_tool(tmp_path) + src = tool._memory_dir / "src.md" + src.write_text("data") + + result = tool._rename(Namespace(old_path="/memories/src.md", new_path=new_path)) + + assert result.return_code != 0, f"new_path {new_path!r} was not rejected" + assert src.exists() + # --------------------------------------------------------------------------- # _clear From 8904e347cab3e418b3de8ea81050c7cbde913ff4 Mon Sep 17 00:00:00 2001 From: KavyaSree2610 <92566732+KavyaSree2610@users.noreply.github.com> Date: Fri, 13 Mar 2026 07:47:12 +0530 Subject: [PATCH 19/20] Remove unused import Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- test/tools/tool_definitions/test_memory_tool.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py index e7de107..579ca23 100644 --- a/test/tools/tool_definitions/test_memory_tool.py +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -10,7 +10,6 @@ from unittest.mock import Mock from microbots.tools.tool_definitions.memory_tool import MemoryTool -from microbots.environment.Environment import CmdReturn # --------------------------------------------------------------------------- From 77578bddeeb45ff6c0c919711382ceba25ec8200 Mon Sep 17 00:00:00 2001 From: Kavya Sree Kaitepalli Date: Fri, 13 Mar 2026 08:27:09 +0000 Subject: [PATCH 20/20] Refactor MemoryTool documentation and validation: improve path validation messages and update usage instructions --- .../tools/tool_definitions/memory_tool.py | 27 +++++++------------ .../tool_definitions/test_memory_tool.py | 8 ++---- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py index 599137a..0dd5ec2 100644 --- a/src/microbots/tools/tool_definitions/memory_tool.py +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -21,8 +21,8 @@ def error(self, message: str) -> None: # type: ignore[override] raise ValueError(message) INSTRUCTIONS_TO_LLM = """ -Use this tool to persist information to files across steps — same interface as -the Anthropic memory tool. All paths must be under /memories/. +Use this tool to persist information to files across steps. +All paths must be under /memories/. MEMORY PROTOCOL: 1. ALWAYS run `memory view /memories` BEFORE doing anything else to check for @@ -86,16 +86,6 @@ class MemoryTool(ExternalTool): All files are stored under ``memory_dir`` on the host (default ``~/.microbots/memory``). The LLM uses paths like ``/memories/notes.md`` which are resolved relative to ``memory_dir``. - - Supported subcommands - --------------------- - memory view [--start N] [--end N] - memory create - memory str_replace --old --new - memory insert --line N --text - memory delete - memory rename - memory clear """ name: str = Field(default="memory") @@ -152,6 +142,11 @@ def _build_parser(self) -> _NoExitArgumentParser: def _resolve(self, path: str) -> Path: """Resolve a /memories/… path to an absolute host path.""" + if not path.startswith("/"): + raise ValueError( + f"Invalid memory path: {path!r}. Paths must start with /memories/." + ) + stripped = path.lstrip("/") # Reject any path containing '..' components before resolving @@ -262,9 +257,9 @@ def _str_replace(self, args: argparse.Namespace) -> CmdReturn: if count == 0: return CmdReturn(stdout="", stderr=f"Text not found in {args.path!r}", return_code=1) if count > 1: - return CmdReturn(stdout="", stderr=f"Text appears {count} times in {args.path!r} — must be unique", return_code=1) + return CmdReturn(stdout="", stderr=f"Text appears {count} times in {args.path!r} - must be unique", return_code=1) resolved.write_text(content.replace(args.old, args.new, 1), encoding="utf-8") - return CmdReturn(stdout=f"File {args.path} edited.", stderr="", return_code=0) + return CmdReturn(stdout=f"File {args.path} has been edited.", stderr="", return_code=0) def _insert(self, args: argparse.Namespace) -> CmdReturn: resolved = self._resolve(args.path) @@ -272,14 +267,12 @@ def _insert(self, args: argparse.Namespace) -> CmdReturn: return CmdReturn(stdout="", stderr=f"File not found: {args.path!r}", return_code=1) file_lines = resolved.read_text(encoding="utf-8").splitlines() if args.line < 0 or args.line > len(file_lines): - return CmdReturn(stdout="", stderr=f"Invalid line number {args.line}. Must be 0–{len(file_lines)}.", return_code=1) + return CmdReturn(stdout="", stderr=f"Invalid line number {args.line}. Must be 0 - {len(file_lines)}.", return_code=1) file_lines.insert(args.line, args.text.rstrip("\n")) resolved.write_text("\n".join(file_lines) + "\n", encoding="utf-8") return CmdReturn(stdout=f"Text inserted at line {args.line} in {args.path}.", stderr="", return_code=0) def _delete(self, args: argparse.Namespace) -> CmdReturn: - if args.path.rstrip("/") in ("/memories", "memories", ""): - return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1) resolved = self._resolve(args.path) if resolved == self._memory_dir.resolve(): return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1) diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py index 579ca23..e47bc49 100644 --- a/test/tools/tool_definitions/test_memory_tool.py +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -117,7 +117,7 @@ def test_resolve_rejects_bare_relative_paths(self, tmp_path): """Bare relative paths without /memories/ prefix must be rejected to match the documented contract.""" tool = make_tool(tmp_path) - for bad in ("notes.md", "sub/dir/file.md", "file.txt"): + for bad in ("notes.md", "sub/dir/file.md", "file.txt", "memories/foo.md"): with pytest.raises(ValueError, match="Paths must start with /memories/"): tool._resolve(bad) @@ -377,7 +377,7 @@ def test_delete_removes_directory(self, tmp_path): def test_delete_prevents_root_deletion(self, tmp_path): tool = make_tool(tmp_path) - for path in ("/memories", "memories", "/memories/"): + for path in ("/memories", "/memories/"): result = tool._delete(Namespace(path=path)) assert result.return_code != 0 @@ -386,8 +386,6 @@ def test_delete_prevents_root_deletion(self, tmp_path): "/memories/./", # trailing slash variant "//memories", # double leading slash "//memories/", # double leading slash + trailing slash - "memories/.", # relative with dot - "memories/./", # relative with dot + trailing slash ]) def test_delete_prevents_root_deletion_normalized_paths(self, tmp_path, path): """Root-equivalent paths that bypass the simple string guard must @@ -456,7 +454,6 @@ def test_rename_missing_args_returns_error(self, tmp_path): "/memories/", "/memories/.", "//memories", - "memories/.", ]) def test_rename_prevents_renaming_root_as_source(self, tmp_path, old_path): """Renaming the memory root itself must be rejected.""" @@ -474,7 +471,6 @@ def test_rename_prevents_renaming_root_as_source(self, tmp_path, old_path): "/memories/", "/memories/.", "//memories", - "memories/.", ]) def test_rename_prevents_overwriting_root_as_destination(self, tmp_path, new_path): """Renaming onto the memory root must be rejected."""