diff --git a/.github/workflows/dockerBuildPush.yml b/.github/workflows/dockerBuildPush.yml index 08341d0..ae10709 100644 --- a/.github/workflows/dockerBuildPush.yml +++ b/.github/workflows/dockerBuildPush.yml @@ -1,6 +1,7 @@ name: Build and Push to Docker Hub on: + workflow_dispatch: push: paths: - 'src/microbots/environment/local_docker/image_builder/Dockerfile' diff --git a/pytest.ini b/pytest.ini index d0e4f1f..80758ec 100644 --- a/pytest.ini +++ b/pytest.ini @@ -11,5 +11,6 @@ addopts = markers = unit: Unit tests integration: Integration tests + anthropic_integration: Integration tests requiring a real Anthropic API key slow: Slow tests docker: marks tests that require a running Docker daemon and pull container images diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py index 3ace5a4..f2708b1 100644 --- a/src/microbots/MicroBot.py +++ b/src/microbots/MicroBot.py @@ -335,7 +335,9 @@ def _create_llm(self): ) elif self.model_provider == ModelProvider.ANTHROPIC: self.llm = AnthropicApi( - system_prompt=system_prompt_with_tools, deployment_name=self.deployment_name + system_prompt=system_prompt_with_tools, + deployment_name=self.deployment_name, + additional_tools=self.additional_tools, ) # No Else case required as model provider is already validated using _validate_model_and_provider diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py index f40118a..a403f9c 100644 --- a/src/microbots/llm/anthropic_api.py +++ b/src/microbots/llm/anthropic_api.py @@ -1,7 +1,9 @@ import json import os +import re from dataclasses import asdict from logging import getLogger +from typing import List, Optional from dotenv import load_dotenv from anthropic import Anthropic @@ -16,9 +18,49 @@ api_key = os.getenv("ANTHROPIC_API_KEY") + class AnthropicApi(LLMInterface): - def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3): + def upgrade_tools(self, tools: list) -> list: + """Replace ``MemoryTool`` with ``AnthropicMemoryTool`` for native tool-use.""" + from microbots.tools.tool_definitions.memory_tool import MemoryTool + from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + upgraded = [] + for tool in tools: + if isinstance(tool, MemoryTool) and not isinstance(tool, AnthropicMemoryTool): + logger.info( + "\U0001f9e0 Auto-upgrading MemoryTool \u2192 AnthropicMemoryTool for Anthropic provider" + ) + upgraded.append(AnthropicMemoryTool( + memory_dir=tool.memory_dir, + usage_instructions=tool.usage_instructions_to_llm, + )) + else: + upgraded.append(tool) + return upgraded + + def __init__( + self, + system_prompt: str, + deployment_name: str = deployment_name, + max_retries: int = 3, + additional_tools: Optional[List] = None, + ): + """ + Parameters + ---------- + system_prompt : str + System prompt for the LLM. + deployment_name : str + The Anthropic model deployment name. + max_retries : int + Maximum number of retries for invalid LLM responses. + additional_tools : Optional[List] + Tool objects passed from MicroBot. Any provider-agnostic tools + (e.g. ``MemoryTool``) are silently upgraded to their Anthropic- + native variants, and their API schemas are extracted. + """ self.ai_client = Anthropic( api_key=api_key, base_url=endpoint @@ -27,30 +69,112 @@ def __init__(self, system_prompt, deployment_name=deployment_name, max_retries=3 self.system_prompt = system_prompt self.messages = [] + # Silently upgrade tools in-place and extract API schemas + tools = additional_tools or [] + upgraded = self.upgrade_tools(tools) + # Mutate the original list so the caller (MicroBot) sees upgraded tools + if additional_tools is not None: + additional_tools[:] = upgraded + self._tool_dicts = [ + t.to_dict() for t in upgraded + if callable(getattr(t, "to_dict", None)) + ] + self._pending_tool_response = None + # Set these values here. This logic will be handled in the parent class. self.max_retries = max_retries self.retries = 0 - def ask(self, message) -> LLMAskResponse: + # ---------------------------------------------------------------------- # + # Internal helpers + # ---------------------------------------------------------------------- # + + def _call_api(self) -> object: + """Call the Anthropic messages API, including tool definitions when present.""" + kwargs = dict( + model=self.deployment_name, + system=self.system_prompt, + messages=self.messages, + max_tokens=4096, + ) + + if self._tool_dicts: + kwargs["tools"] = self._tool_dicts + + return self.ai_client.messages.create(**kwargs) + + def _append_tool_result(self, response, result_text: str) -> None: + """Append the assistant tool_use turn and the corresponding tool_result user turn. + + Called when the caller provides the tool execution result via + the next ``ask()`` call. + """ + assistant_content = [block.model_dump() for block in response.content] + self.messages.append({"role": "assistant", "content": assistant_content}) + + tool_results = [] + for block in response.content: + if block.type != "tool_use": + continue + tool_results.append({ + "type": "tool_result", + "tool_use_id": block.id, + "content": str(result_text), + }) + + self.messages.append({"role": "user", "content": tool_results}) + + # ---------------------------------------------------------------------- # + # Public interface + # ---------------------------------------------------------------------- # + + def ask(self, message: str) -> LLMAskResponse: self.retries = 0 # reset retries for each ask. Handled in parent class. - self.messages.append({"role": "user", "content": message}) + if self._pending_tool_response: + # Previous response was tool_use — format this message as tool results. + self._append_tool_result(self._pending_tool_response, message) + self._pending_tool_response = None + else: + self.messages.append({"role": "user", "content": message}) valid = False while not valid: - response = self.ai_client.messages.create( - model=self.deployment_name, - system=self.system_prompt, - messages=self.messages, - max_tokens=4096, - ) - - # Extract text content from response - response_text = response.content[0].text if response.content else "" + response = self._call_api() + + if response.stop_reason == "tool_use": + # Return tool call info as an LLMAskResponse so the + # caller (MicroBot.run) can dispatch the tool. + self._pending_tool_response = response + + thoughts = "" + for block in response.content: + if block.type == "text": + thoughts = block.text + break + + tool_calls = [] + for block in response.content: + if block.type == "tool_use": + tool_calls.append({ + "name": block.name, + "id": block.id, + "input": block.input, + }) + + command = json.dumps({"native_tool_calls": tool_calls}) + return LLMAskResponse(task_done=False, thoughts=thoughts, command=command) + + # Extract text content from the final response + response_text = "" + for block in response.content: + if block.type == "text": + response_text = block.text + break + logger.debug("Raw Anthropic response (first 500 chars): %s", response_text[:500]) # Try to extract JSON if wrapped in markdown code blocks - import re json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL) if json_match: response_text = json_match.group(1) diff --git a/src/microbots/llm/llm.py b/src/microbots/llm/llm.py index 2800790..e7e5c22 100644 --- a/src/microbots/llm/llm.py +++ b/src/microbots/llm/llm.py @@ -29,6 +29,15 @@ def ask(self, message: str) -> LLMAskResponse: def clear_history(self) -> bool: pass + def upgrade_tools(self, tools: list) -> list: + """Upgrade tools for the specific LLM provider. + + The default implementation is a no-op. Subclasses (e.g. + ``AnthropicApi``) override this to swap provider-agnostic tools + with their native equivalents. + """ + return tools + def _validate_llm_response(self, response: str) -> tuple[bool, LLMAskResponse]: if self.retries >= self.max_retries: diff --git a/src/microbots/tools/tool_definitions/__init__.py b/src/microbots/tools/tool_definitions/__init__.py new file mode 100644 index 0000000..88acf0d --- /dev/null +++ b/src/microbots/tools/tool_definitions/__init__.py @@ -0,0 +1,2 @@ +from microbots.tools.tool_definitions.memory_tool import MemoryTool +from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool \ No newline at end of file diff --git a/src/microbots/tools/tool_definitions/anthropic_memory_tool.py b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py new file mode 100644 index 0000000..ef26182 --- /dev/null +++ b/src/microbots/tools/tool_definitions/anthropic_memory_tool.py @@ -0,0 +1,213 @@ +""" +AnthropicMemoryTool — wraps Anthropic's memory tool. + +The memory tool lets the model persist information across conversations by +reading and writing files in a local memory directory. When the model invokes +the tool, it sends a command (view, create, str_replace, insert, delete, +rename) and the client executes it against a local filesystem directory. + +This implementation extends both: + - ``MemoryTool``: provides all file-operation logic (_resolve, _view, + _create, _str_replace, _insert, _delete, _rename, _clear) and satisfies + the ``ToolAbstract`` ABC (install_tool, verify_tool_installation, etc.). + - ``BetaAbstractMemoryTool`` (SDK): provides native Anthropic dispatch and + the ``to_dict()`` / ``call()`` interface required by AnthropicApi. + +The SDK command-handler overrides (view, create, str_replace, insert, delete, +rename) simply translate SDK command objects → arg lists and delegate to the +inherited MemoryTool private methods, converting the CmdReturn back to a +string as the SDK expects. + +The memory tool (type ``memory_20250818``) is available in the standard +Anthropic library and does not require a beta endpoint or header. Pass it +via ``tools=[{"type": "memory_20250818", "name": "memory"}]`` on a regular +``client.messages.create(...)`` call. ``MicroBot`` auto-upgrades +``MemoryTool`` to ``AnthropicMemoryTool`` for Anthropic providers and +passes the tool schema to ``AnthropicApi`` via ``tool_dicts``. + +Usage: + from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + memory = AnthropicMemoryTool() + bot = ReadingBot(..., additional_tools=[memory]) +""" + +from __future__ import annotations + +import json +from logging import getLogger +from pathlib import Path + +from typing_extensions import override + +from anthropic.lib.tools import BetaAbstractMemoryTool as _SDKMemoryTool +from anthropic.types.beta import ( + BetaMemoryTool20250818CreateCommand, + BetaMemoryTool20250818DeleteCommand, + BetaMemoryTool20250818InsertCommand, + BetaMemoryTool20250818RenameCommand, + BetaMemoryTool20250818StrReplaceCommand, + BetaMemoryTool20250818ViewCommand, +) + +from microbots.environment.Environment import CmdReturn +from microbots.tools.tool_definitions.memory_tool import MemoryTool + +logger = getLogger(__name__) + +DEFAULT_MEMORY_INSTRUCTIONS = ( + "MEMORY PROTOCOL:\n" + "1. ALWAYS view your memory directory BEFORE doing anything else " + "using the `view` command of your `memory` tool to check for earlier progress.\n" + "2. As you make progress on the task, record status, progress, " + "and key findings in your memory using the memory tool.\n" + "3. ASSUME INTERRUPTION: Your context window might be reset at any moment, " + "so you risk losing any progress that is not recorded in your memory directory.\n" + "4. Before completing a task, always save your final results and analysis to memory.\n" + "5. When editing your memory folder, always keep its content up-to-date, coherent " + "and organized. Rename or delete files that are no longer relevant. " + "Do not create new files unless necessary.\n\n" + "IMPORTANT: The memory tool ONLY works with paths under /memories/. " + "Do NOT use the memory tool to access the repository or workdir. " + "Use shell commands (ls, cat, etc.) for filesystem access." +) + + +class AnthropicMemoryTool(MemoryTool, _SDKMemoryTool): + """ + Anthropic's built-in memory tool, backed by MemoryTool's file logic. + + Inherits file-operation logic from ``MemoryTool`` (plain Python class) and + the SDK's native dispatch interface from ``BetaAbstractMemoryTool``. + + The SDK command-handler overrides delegate to the inherited private methods + (``_view``, ``_create``, etc.), translating the SDK ``Command`` objects to + the ``args: list`` format that those methods expect, and converting the + returned ``CmdReturn`` to the string that the SDK API requires. + + Parameters + ---------- + memory_dir : str | Path | None + Root directory for memory files. Defaults to ``~/.microbots/memory``. + usage_instructions : str | None + Custom instructions appended to the system prompt for the LLM. + Defaults to ``DEFAULT_MEMORY_INSTRUCTIONS``. + """ + + def __init__( + self, + memory_dir: str | Path | None = None, + usage_instructions: str | None = None, + ) -> None: + MemoryTool.__init__( + self, + memory_dir=str(memory_dir) if memory_dir else None, + usage_instructions_to_llm=( + usage_instructions + if usage_instructions is not None + else DEFAULT_MEMORY_INSTRUCTIONS + ), + ) + _SDKMemoryTool.__init__(self) # type: ignore[call-arg] + + # ---------------------------------------------------------------------- # + # ToolAbstract overrides + # ---------------------------------------------------------------------- # + + def is_model_supported(self, model_name: str) -> bool: + """Only Anthropic (Claude) models support the native memory tool.""" + return "claude" in model_name.lower() + + def is_invoked(self, command: str) -> bool: + """Return True when the command is a serialized native_tool_calls JSON + containing a call to the ``memory`` tool.""" + try: + data = json.loads(command) + if "native_tool_calls" in data: + return any(tc["name"] == "memory" for tc in data["native_tool_calls"]) + except (json.JSONDecodeError, KeyError, TypeError): + pass + return False + + def invoke(self, command: str, parent_bot) -> CmdReturn: + """Execute all memory tool calls in the serialized native_tool_calls batch.""" + data = json.loads(command) + results = [] + for tc in data["native_tool_calls"]: + if tc["name"] != "memory": + continue + try: + result = self.call(tc["input"]) + logger.info( + "\U0001f9e0 Native tool 'memory' executed. Result (first 200 chars): %s", + str(result)[:200], + ) + results.append(str(result)) + except Exception as exc: + logger.error("Native tool 'memory' raised: %s", exc) + results.append(f"Error executing tool 'memory': {exc}") + combined = "\n".join(results) + return CmdReturn(stdout=combined, stderr="", return_code=0) + + def clear_all(self) -> None: + """Delete all memory files (useful for testing or resetting state).""" + self._clear() + logger.info("🧠 AnthropicMemoryTool: memory cleared at %s", self._memory_dir) + + # ---------------------------------------------------------------------- # + # BetaAbstractMemoryTool overrides — delegate to MemoryTool private methods + # ---------------------------------------------------------------------- # + + @override + def clear_all_memory(self) -> str: + self.clear_all() + return "All memory cleared" + + @override + def view(self, command: BetaMemoryTool20250818ViewCommand) -> str: + args = [command.path] + if command.view_range: + args += ["--start", str(command.view_range[0]), "--end", str(command.view_range[1])] + result = self._view(args) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return result.stdout + + @override + def create(self, command: BetaMemoryTool20250818CreateCommand) -> str: + result = self._create([command.path, command.file_text]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return f"File created successfully at {command.path}" + + @override + def str_replace(self, command: BetaMemoryTool20250818StrReplaceCommand) -> str: + result = self._str_replace([command.path, "--old", command.old_str, "--new", command.new_str]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return f"File {command.path} has been edited" + + @override + def insert(self, command: BetaMemoryTool20250818InsertCommand) -> str: + result = self._insert([ + command.path, + "--line", str(command.insert_line), + "--text", command.insert_text, + ]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return f"Text inserted at line {command.insert_line} in {command.path}" + + @override + def delete(self, command: BetaMemoryTool20250818DeleteCommand) -> str: + result = self._delete([command.path]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return result.stdout + + @override + def rename(self, command: BetaMemoryTool20250818RenameCommand) -> str: + result = self._rename([command.old_path, command.new_path]) + if result.return_code != 0: + raise RuntimeError(result.stderr) + return result.stdout diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py new file mode 100644 index 0000000..8131b8b --- /dev/null +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -0,0 +1,321 @@ +import logging +import os +import shlex +import shutil +from pathlib import Path +from typing import Optional + +from pydantic.dataclasses import dataclass, Field + +from microbots.environment.Environment import CmdReturn +from microbots.tools.external_tool import ExternalTool + +logger = logging.getLogger(" 🧠 MemoryTool") + +INSTRUCTIONS_TO_LLM = """ +Use this tool to persist information to files across steps — same interface as +the Anthropic memory tool. All paths must be under /memories/. + +MEMORY PROTOCOL: +1. ALWAYS run `memory view /memories` BEFORE doing anything else to check for + earlier progress. +2. Record status, findings and intermediate results as you go. +3. Before completing a task, save your final results to memory. +4. Keep the memory folder organised — rename or delete stale files. + +## Commands + +View a file or list a directory: + memory view + memory view --start --end + +Create a file: + memory create + +Replace a unique string in a file: + memory str_replace --old "" --new "" + +Insert a line into a file (0 = prepend): + memory insert --line --text "" + +Delete a file or directory: + memory delete + +Rename / move a file: + memory rename + +Clear all memory: + memory clear + +## Examples + + memory view /memories + memory create /memories/progress.md "## Progress\\n- Found bug in src/foo.py line 42" + memory str_replace /memories/progress.md --old "line 42" --new "line 45" + memory insert /memories/progress.md --line 0 --text "# Task Notes" + memory view /memories/progress.md --start 1 --end 10 + memory delete /memories/old_notes.md + memory rename /memories/draft.md /memories/final.md + +## Notes +- Paths must start with /memories/. +- memory create overwrites if the file already exists. +- memory str_replace requires the old text to appear exactly once. +""" + + +@dataclass +class MemoryTool(ExternalTool): + """ + File-backed memory tool that mirrors the ``AnthropicMemoryTool`` interface + but dispatches through the text command loop (compatible with all providers). + + Subclass of ``ExternalTool`` — all command lists are empty so + ``install_tool``, ``setup_tool``, ``verify_tool_installation``, and + ``uninstall_tool`` are all effective no-ops inherited from ``ExternalTool``. + + All files are stored under ``memory_dir`` on the host (default + ``~/.microbots/memory``). The LLM uses paths like ``/memories/notes.md`` + which are resolved relative to ``memory_dir``. + + Supported subcommands + --------------------- + memory view [--start N] [--end N] + memory create + memory str_replace --old --new + memory insert --line N --text + memory delete + memory rename + memory clear + """ + + name: str = Field(default="memory") + description: str = Field( + default="File-backed memory store — view, create, edit, delete files under /memories/." + ) + usage_instructions_to_llm: str = Field(default=INSTRUCTIONS_TO_LLM) + memory_dir: Optional[str] = Field(default=None) + + def __post_init__(self): + base = Path(self.memory_dir) if self.memory_dir else Path.home() / ".microbots" / "memory" + self._memory_dir = base + self._memory_dir.mkdir(parents=True, exist_ok=True) + + # ---------------------------------------------------------------------- # + # Path helpers + # ---------------------------------------------------------------------- # + + def _resolve(self, path: str) -> Path: + """Resolve a /memories/… path to an absolute host path.""" + stripped = path.lstrip("/") + + # Reject any path containing '..' components before resolving + if ".." in Path(stripped).parts: + raise ValueError(f"Path traversal not allowed: {path!r}") + + if path.startswith("/") and stripped != "memories" and not stripped.startswith("memories/"): + raise ValueError( + f"Invalid memory path: {path!r}. Use paths under /memories/." + ) + + if stripped == "memories": + rel = "" + elif stripped.startswith("memories/"): + rel = stripped[len("memories/"):] + else: + rel = stripped # treat as relative to memory_dir + + resolved = (self._memory_dir / rel).resolve() if rel else self._memory_dir.resolve() + # Use trailing separator to prevent prefix confusion with sibling dirs + memory_root = str(self._memory_dir.resolve()) + if resolved != self._memory_dir.resolve() and not str(resolved).startswith(memory_root + os.sep): + raise ValueError(f"Path traversal not allowed: {path!r}") + return resolved + + # ---------------------------------------------------------------------- # + # ToolAbstract interface + # ---------------------------------------------------------------------- # + + def is_invoked(self, command: str) -> bool: + return command.strip().startswith("memory ") + + def invoke(self, command: str, parent_bot) -> CmdReturn: + try: + tokens = shlex.split(command) + except ValueError as exc: + return CmdReturn(stdout="", stderr=f"Parse error: {exc}", return_code=1) + + if len(tokens) < 2: + return CmdReturn(stdout="", stderr="Usage: memory ...", return_code=1) + + sub = tokens[1] + args = tokens[2:] + + try: + if sub == "view": + return self._view(args) + elif sub == "create": + return self._create(args) + elif sub == "str_replace": + return self._str_replace(args) + elif sub == "insert": + return self._insert(args) + elif sub == "delete": + return self._delete(args) + elif sub == "rename": + return self._rename(args) + elif sub == "clear": + return self._clear() + else: + return CmdReturn(stdout="", stderr=f"Unknown subcommand: {sub!r}", return_code=1) + except (ValueError, FileNotFoundError, RuntimeError) as exc: + logger.error("🧠 MemoryTool error: %s", exc) + return CmdReturn(stdout="", stderr=str(exc), return_code=1) + + # ---------------------------------------------------------------------- # + # Subcommand handlers + # ---------------------------------------------------------------------- # + + def _view(self, args: list) -> CmdReturn: + if not args: + return CmdReturn(stdout="", stderr="Usage: memory view [--start N] [--end N]", return_code=1) + + path = args[0] + start_line = None + end_line = None + i = 1 + while i < len(args): + if args[i] == "--start" and i + 1 < len(args): + start_line = int(args[i + 1]); i += 2 + elif args[i] == "--end" and i + 1 < len(args): + end_line = int(args[i + 1]); i += 2 + else: + logger.warning("🧠 MemoryTool view: unknown flag %r (skipped)", args[i]) + i += 1 + + resolved = self._resolve(path) + if not resolved.exists(): + return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1) + + if resolved.is_dir(): + items = [ + (f"{item.name}/" if item.is_dir() else item.name) + for item in sorted(resolved.iterdir()) + if not item.name.startswith(".") + ] + result = f"Directory: {path}\n" + "\n".join(f"- {i}" for i in items) + return CmdReturn(stdout=result, stderr="", return_code=0) + + lines = resolved.read_text(encoding="utf-8").splitlines() + if start_line is not None or end_line is not None: + s = max(0, (start_line or 1) - 1) + e = len(lines) if (end_line is None or end_line == -1) else end_line + lines = lines[s:e] + base_num = s + 1 + else: + base_num = 1 + numbered = "\n".join(f"{i + base_num:4d}: {line}" for i, line in enumerate(lines)) + return CmdReturn(stdout=numbered, stderr="", return_code=0) + + def _create(self, args: list) -> CmdReturn: + if len(args) < 2: + return CmdReturn(stdout="", stderr="Usage: memory create ", return_code=1) + path, content = args[0], args[1] + resolved = self._resolve(path) + resolved.parent.mkdir(parents=True, exist_ok=True) + resolved.write_text(content, encoding="utf-8") + logger.info("🧠 Memory file created: %s", path) + return CmdReturn(stdout=f"File created: {path}", stderr="", return_code=0) + + def _str_replace(self, args: list) -> CmdReturn: + if not args: + return CmdReturn(stdout="", stderr="Usage: memory str_replace --old --new ", return_code=1) + path = args[0] + old_text = new_text = None + i = 1 + while i < len(args): + if args[i] == "--old" and i + 1 < len(args): + old_text = args[i + 1]; i += 2 + elif args[i] == "--new" and i + 1 < len(args): + new_text = args[i + 1]; i += 2 + else: + logger.warning("🧠 MemoryTool str_replace: unknown flag %r (skipped)", args[i]) + i += 1 + if old_text is None or new_text is None: + return CmdReturn(stdout="", stderr="--old and --new are required", return_code=1) + resolved = self._resolve(path) + if not resolved.is_file(): + return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1) + content = resolved.read_text(encoding="utf-8") + count = content.count(old_text) + if count == 0: + return CmdReturn(stdout="", stderr=f"Text not found in {path!r}", return_code=1) + if count > 1: + return CmdReturn(stdout="", stderr=f"Text appears {count} times in {path!r} — must be unique", return_code=1) + resolved.write_text(content.replace(old_text, new_text, 1), encoding="utf-8") + return CmdReturn(stdout=f"File {path} edited.", stderr="", return_code=0) + + def _insert(self, args: list) -> CmdReturn: + if not args: + return CmdReturn(stdout="", stderr="Usage: memory insert --line N --text ", return_code=1) + path = args[0] + line_num = text = None + i = 1 + while i < len(args): + if args[i] == "--line" and i + 1 < len(args): + line_num = int(args[i + 1]); i += 2 + elif args[i] == "--text" and i + 1 < len(args): + text = args[i + 1]; i += 2 + else: + logger.warning("🧠 MemoryTool insert: unknown flag %r (skipped)", args[i]) + i += 1 + if line_num is None or text is None: + return CmdReturn(stdout="", stderr="--line and --text are required", return_code=1) + resolved = self._resolve(path) + if not resolved.is_file(): + return CmdReturn(stdout="", stderr=f"File not found: {path!r}", return_code=1) + lines = resolved.read_text(encoding="utf-8").splitlines() + if line_num < 0 or line_num > len(lines): + return CmdReturn(stdout="", stderr=f"Invalid line number {line_num}. Must be 0–{len(lines)}.", return_code=1) + lines.insert(line_num, text.rstrip("\n")) + resolved.write_text("\n".join(lines) + "\n", encoding="utf-8") + return CmdReturn(stdout=f"Text inserted at line {line_num} in {path}.", stderr="", return_code=0) + + def _delete(self, args: list) -> CmdReturn: + if not args: + return CmdReturn(stdout="", stderr="Usage: memory delete ", return_code=1) + path = args[0] + if path.rstrip("/") in ("/memories", "memories", ""): + return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1) + resolved = self._resolve(path) + if resolved.is_file(): + resolved.unlink() + logger.info("🧠 Memory file deleted: %s", path) + return CmdReturn(stdout=f"Deleted: {path}", stderr="", return_code=0) + if resolved.is_dir(): + shutil.rmtree(resolved) + logger.info("🧠 Memory directory deleted: %s", path) + return CmdReturn(stdout=f"Deleted directory: {path}", stderr="", return_code=0) + return CmdReturn(stdout="", stderr=f"Path not found: {path!r}", return_code=1) + + def _rename(self, args: list) -> CmdReturn: + if len(args) < 2: + return CmdReturn(stdout="", stderr="Usage: memory rename ", return_code=1) + old_path, new_path = args[0], args[1] + old_resolved = self._resolve(old_path) + new_resolved = self._resolve(new_path) + if not old_resolved.exists(): + return CmdReturn(stdout="", stderr=f"Source not found: {old_path!r}", return_code=1) + if new_resolved.exists(): + return CmdReturn(stdout="", stderr=f"Destination already exists: {new_path!r}", return_code=1) + new_resolved.parent.mkdir(parents=True, exist_ok=True) + old_resolved.rename(new_resolved) + logger.info("🧠 Memory renamed: %s → %s", old_path, new_path) + return CmdReturn(stdout=f"Renamed {old_path} to {new_path}.", stderr="", return_code=0) + + def _clear(self) -> CmdReturn: + if self._memory_dir.exists(): + shutil.rmtree(self._memory_dir) + self._memory_dir.mkdir(parents=True, exist_ok=True) + logger.info("🧠 Memory cleared.") + return CmdReturn(stdout="Memory cleared.", stderr="", return_code=0) diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py new file mode 100644 index 0000000..25c717d --- /dev/null +++ b/test/bot/test_memory_tool_integration.py @@ -0,0 +1,439 @@ +"""Tests for the Anthropic memory tool end-to-end flow. + +Unit tests (mocked API): + Verify wiring — auto-upgrade, tool dispatch, and memory file operations + with a mocked Anthropic client. Fast, free, no API key needed. + +Integration tests (real API): + Hit the actual Anthropic API to verify the full round-trip. + Gated behind ``@pytest.mark.anthropic_integration``. + Require ``ANTHROPIC_API_KEY`` in .env. +""" + +import json +import os +import sys +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest +from dotenv import load_dotenv + +load_dotenv() + +sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")) +) + +from microbots import MicroBot, BotRunResult +from microbots.llm.llm import llm_output_format_str +from microbots.tools.tool_definitions.memory_tool import MemoryTool +from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_tool_use_response(tool_name, tool_id, tool_input, thinking_text=""): + """Build a mock Anthropic API response with stop_reason='tool_use'.""" + blocks = [] + + if thinking_text: + text_block = Mock() + text_block.type = "text" + text_block.text = thinking_text + blocks.append(text_block) + + tool_block = Mock() + tool_block.type = "tool_use" + tool_block.name = tool_name + tool_block.id = tool_id + tool_block.input = tool_input + tool_block.model_dump = Mock(return_value={ + "type": "tool_use", + "id": tool_id, + "name": tool_name, + "input": tool_input, + }) + blocks.append(tool_block) + + resp = Mock() + resp.stop_reason = "tool_use" + resp.content = blocks + return resp + + +def _make_end_turn_response(task_done, thoughts, command=""): + """Build a mock Anthropic API response with stop_reason='end_turn'.""" + payload = json.dumps({ + "task_done": task_done, + "thoughts": thoughts, + "command": command, + }) + + text_block = Mock() + text_block.type = "text" + text_block.text = payload + + resp = Mock() + resp.stop_reason = "end_turn" + resp.content = [text_block] + return resp + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolWiring: + """Unit tests — mocked Anthropic client, real tool dispatch and file ops.""" + + @pytest.fixture() + def memory_dir(self, tmp_path): + d = tmp_path / "memory" + d.mkdir() + return d + + @pytest.fixture() + def bot(self, memory_dir): + """Create a MicroBot with Anthropic provider and a MemoryTool. + + The Anthropic client is mocked, but the rest of the stack is real: + auto-upgrade, tool dispatch, and memory file operations. + """ + tool = MemoryTool( + memory_dir=str(memory_dir), + usage_instructions_to_llm="Use the memory tool to persist notes.", + ) + + mock_env = Mock() + mock_env.execute.return_value = Mock(return_code=0, stdout="", stderr="") + + anthropic_deployment = "claude-sonnet-4-5" + + with patch("microbots.llm.anthropic_api.Anthropic") as mock_anthropic_cls, \ + patch("microbots.llm.anthropic_api.api_key", "test-key"), \ + patch("microbots.llm.anthropic_api.endpoint", "https://api.anthropic.com"), \ + patch("microbots.llm.anthropic_api.deployment_name", anthropic_deployment): + + bot = MicroBot( + model=f"anthropic/{anthropic_deployment}", + system_prompt="You are a helpful assistant.", + additional_tools=[tool], + environment=mock_env, + ) + + self._mock_client = mock_anthropic_cls.return_value + yield bot + del bot + + # -- Upgrade verification ----------------------------------------------- + + def test_memory_tool_auto_upgraded_to_anthropic_variant(self, bot): + """MemoryTool passed to MicroBot should be auto-upgraded to AnthropicMemoryTool.""" + upgraded_tools = bot.additional_tools + memory_tools = [t for t in upgraded_tools if isinstance(t, AnthropicMemoryTool)] + assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade" + + def test_tool_dicts_include_memory_schema(self, bot): + """The LLM should have received the memory tool schema.""" + assert len(bot.llm._tool_dicts) == 1 + assert bot.llm._tool_dicts[0]["type"] == "memory_20250818" + + # -- Create file via tool_use ------------------------------------------- + + def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir): + """LLM requests a memory create → MicroBot dispatches → file appears on disk.""" + # Sequence: + # 1. ask(task) → API returns tool_use (memory create) + # 2. ask(tool_result) → API returns end_turn (task_done=True) + self._mock_client.messages.create.side_effect = [ + _make_tool_use_response( + tool_name="memory", + tool_id="tool_001", + tool_input={ + "command": "create", + "path": "/memories/notes.md", + "file_text": "Hello from integration test", + }, + thinking_text="I'll save a note to memory.", + ), + _make_end_turn_response( + task_done=True, + thoughts="Saved a note to memory successfully.", + ), + ] + + result: BotRunResult = bot.run( + "Save a note saying 'Hello from integration test'", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + assert result.error is None + + # Verify the file was actually created on disk + # _resolve("/memories/notes.md") strips the "memories/" prefix → memory_dir/notes.md + created_file = memory_dir / "notes.md" + assert created_file.exists(), f"Expected {created_file} to be created" + assert created_file.read_text() == "Hello from integration test" + + # -- View file via tool_use --------------------------------------------- + + def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir): + """LLM requests a memory view → MicroBot dispatches → file content returned.""" + # Pre-create a file in memory + # _resolve("/memories/existing.md") → memory_dir/existing.md + (memory_dir / "existing.md").write_text("Previously saved content") + + self._mock_client.messages.create.side_effect = [ + _make_tool_use_response( + tool_name="memory", + tool_id="tool_002", + tool_input={ + "command": "view", + "path": "/memories/existing.md", + }, + thinking_text="Let me check my memory.", + ), + _make_end_turn_response( + task_done=True, + thoughts="Found previously saved content in memory.", + ), + ] + + result: BotRunResult = bot.run( + "Check your memory for existing notes", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + + # Verify the view result was passed back to the API as tool_result + calls = self._mock_client.messages.create.call_args_list + assert len(calls) == 2 + # The second call should have messages including the tool_result + second_call_messages = calls[1].kwargs.get("messages") or calls[1][1].get("messages", []) + tool_result_msgs = [ + m for m in second_call_messages + if m.get("role") == "user" and isinstance(m.get("content"), list) + and any(c.get("type") == "tool_result" for c in m["content"]) + ] + assert len(tool_result_msgs) >= 1, "Expected a tool_result message in the second API call" + # The tool_result content should contain the file content + tool_result_content = tool_result_msgs[-1]["content"][0]["content"] + assert "Previously saved content" in tool_result_content + + # -- Multiple tool calls in sequence ------------------------------------ + + def test_create_then_view_memory_file(self, bot, memory_dir): + """LLM creates a file, then views it — both dispatched via MicroBot loop.""" + self._mock_client.messages.create.side_effect = [ + # Step 1: create file + _make_tool_use_response( + tool_name="memory", + tool_id="tool_003", + tool_input={ + "command": "create", + "path": "/memories/todo.md", + "file_text": "- Fix bug #42\n- Write tests", + }, + thinking_text="Creating a todo list.", + ), + # Step 2: view file + _make_tool_use_response( + tool_name="memory", + tool_id="tool_004", + tool_input={ + "command": "view", + "path": "/memories/todo.md", + }, + thinking_text="Let me verify what I wrote.", + ), + # Step 3: done + _make_end_turn_response( + task_done=True, + thoughts="Created and verified the todo list.", + ), + ] + + result: BotRunResult = bot.run( + "Create a todo list and verify it was saved", + max_iterations=10, + timeout_in_seconds=30, + ) + + assert result.status is True + assert result.error is None + + # File should exist with correct content + created_file = memory_dir / "todo.md" + assert created_file.exists() + assert "Fix bug #42" in created_file.read_text() + + # -- Non-memory commands still go to environment ------------------------ + + def test_non_memory_commands_go_to_environment(self, bot): + """Regular shell commands should be dispatched to the environment, not the memory tool.""" + self._mock_client.messages.create.side_effect = [ + _make_end_turn_response( + task_done=False, + thoughts="Let me check the files.", + command="ls -la", + ), + _make_end_turn_response( + task_done=True, + thoughts="Done.", + ), + ] + + result: BotRunResult = bot.run( + "List the files", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + # The environment.execute should have been called with "ls -la" + bot.environment.execute.assert_called_with("ls -la") + + +# --------------------------------------------------------------------------- +# Real integration tests — require ANTHROPIC_API_KEY +# --------------------------------------------------------------------------- + +MEMORY_SYSTEM_PROMPT = f"""You are a helpful assistant with access to a memory tool. +You can save and retrieve notes using the memory tool. +All your responses must be in this JSON format: +{llm_output_format_str} +The properties (task_done, thoughts, command) are mandatory on each response. +When you are done, set task_done to true and command to an empty string. +""" + + +@pytest.mark.anthropic_integration +@pytest.mark.docker +class TestMemoryToolRealApi: + """End-to-end integration tests that hit the real Anthropic API. + + These tests exercise the full MicroBot → AnthropicApi → memory tool + pipeline with no mocking. A real Docker environment is created + (matching the AgentBoss integration test pattern). + + Run with:: + + pytest -m anthropic_integration + + Requires ``ANTHROPIC_API_KEY`` in ``.env``. + """ + + @pytest.fixture() + def memory_dir(self, tmp_path): + d = tmp_path / "memory" + d.mkdir() + return d + + @pytest.fixture() + def memory_bot(self, memory_dir): + """Create a MicroBot with the real Anthropic API, real Docker env, + and a MemoryTool. No mocking — fully end-to-end. + """ + tool = MemoryTool( + memory_dir=str(memory_dir), + usage_instructions_to_llm="Use the memory tool to persist notes.", + ) + + anthropic_deployment = os.getenv("ANTHROPIC_DEPLOYMENT_NAME", "claude-sonnet-4-5") + + bot = MicroBot( + model=f"anthropic/{anthropic_deployment}", + system_prompt=MEMORY_SYSTEM_PROMPT, + additional_tools=[tool], + ) + + yield bot + del bot + + def test_memory_tool_auto_upgraded(self, memory_bot): + """MemoryTool should be silently auto-upgraded to AnthropicMemoryTool.""" + memory_tools = [t for t in memory_bot.additional_tools if isinstance(t, AnthropicMemoryTool)] + assert len(memory_tools) == 1, "Expected exactly one AnthropicMemoryTool after auto-upgrade" + + def test_create_memory_file(self, memory_bot, memory_dir): + """MicroBot should persist a debugging plan to memory. + + The LLM is expected to: + 1. Receive a task about planning a debugging session. + 2. Decide to persist the plan using the memory tool. + 3. Confirm the task is done. + + We verify the plan was actually written to disk. + """ + result: BotRunResult = memory_bot.run( + task=( + "You are investigating a bug where the server returns HTTP 500 " + "on POST /api/users. Create a debugging plan that includes: " + "1) check server logs, 2) reproduce the request with curl, " + "3) inspect the database connection. " + "Persist this plan so you can resume later if interrupted." + ), + max_iterations=10, + timeout_in_seconds=60, + ) + + assert result.status is True, f"Task failed: {result.error}" + assert result.error is None + + # The LLM should have used the memory tool to persist the plan + saved_files = [f for f in memory_dir.rglob("*") if f.is_file()] + assert len(saved_files) >= 1, ( + f"Expected at least one file created in memory. " + f"Found: {saved_files}" + ) + combined_content = "\n".join(f.read_text() for f in saved_files).lower() + assert "log" in combined_content or "curl" in combined_content or "database" in combined_content, ( + f"Expected debugging plan content in memory files. Content: {combined_content}" + ) + + def test_create_and_view_roundtrip(self, memory_bot, memory_dir): + """MicroBot should save findings and then review them before reporting. + + The LLM is expected to: + 1. Record analysis findings using the memory tool. + 2. Review what it recorded to verify nothing was missed. + 3. Summarize the findings in its final thoughts. + + We verify: + - At least one file was written to disk. + - The LLM's summary references the recorded findings. + """ + result: BotRunResult = memory_bot.run( + task=( + "You analyzed a Python project and found these issues: " + "1) an unused import 'os' in utils.py, " + "2) a missing null check in handler.py line 42. " + "Record these findings, then review your notes and " + "summarize what you found in your final thoughts." + ), + max_iterations=15, + timeout_in_seconds=60, + ) + + assert result.status is True, f"Task failed: {result.error}" + assert result.error is None + + # The LLM should have created at least one memory file + saved_files = [f for f in memory_dir.rglob("*") if f.is_file()] + assert len(saved_files) >= 1, ( + f"Expected at least one file in memory. " + f"Found: {list(memory_dir.rglob('*'))}" + ) + + result_lower = result.result.lower() + assert "import" in result_lower or "null" in result_lower or "handler" in result_lower, ( + f"LLM should have summarized the findings. Got: {result.result}" + ) diff --git a/test/bot/test_upgrade_tools_for_provider.py b/test/bot/test_upgrade_tools_for_provider.py new file mode 100644 index 0000000..459afd4 --- /dev/null +++ b/test/bot/test_upgrade_tools_for_provider.py @@ -0,0 +1,117 @@ +"""Unit tests for AnthropicApi.upgrade_tools() method. + +These tests verify that plain ``MemoryTool`` instances are automatically +replaced with ``AnthropicMemoryTool`` when using ``AnthropicApi.upgrade_tools``. +""" +import sys +import os +import logging +import pytest +from unittest.mock import patch, Mock + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) + +from microbots.llm.anthropic_api import AnthropicApi +from microbots.tools.tool_definitions.memory_tool import MemoryTool +from microbots.tools.tool_definitions.anthropic_memory_tool import AnthropicMemoryTool + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _memory_tool(tmp_path, instructions: str = "default instructions") -> MemoryTool: + return MemoryTool( + memory_dir=str(tmp_path / "memory"), + usage_instructions_to_llm=instructions, + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestUpgradeToolsForProvider: + + @pytest.fixture(autouse=True) + def _create_api(self): + with patch("microbots.llm.anthropic_api.Anthropic"): + self.api = AnthropicApi(system_prompt="test") + + # -- AnthropicApi.upgrade_tools: MemoryTool → AnthropicMemoryTool -------- + + def test_memory_tool_is_replaced_with_anthropic_variant(self, tmp_path): + tool = _memory_tool(tmp_path) + + upgraded = self.api.upgrade_tools([tool]) + + assert len(upgraded) == 1 + assert isinstance(upgraded[0], AnthropicMemoryTool) + + def test_memory_dir_is_forwarded_to_upgraded_tool(self, tmp_path): + mem_dir = str(tmp_path / "my_memory") + tool = MemoryTool(memory_dir=mem_dir) + + upgraded = self.api.upgrade_tools([tool]) + + assert isinstance(upgraded[0], AnthropicMemoryTool) + assert str(upgraded[0].memory_dir) == mem_dir + + def test_usage_instructions_are_forwarded_to_upgraded_tool(self, tmp_path): + custom_instructions = "custom memory instructions for test" + tool = _memory_tool(tmp_path, instructions=custom_instructions) + + upgraded = self.api.upgrade_tools([tool]) + + assert upgraded[0].usage_instructions_to_llm == custom_instructions + + def test_already_anthropic_memory_tool_is_not_re_upgraded(self, tmp_path): + existing = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory")) + + upgraded = self.api.upgrade_tools([existing]) + + assert len(upgraded) == 1 + assert upgraded[0] is existing + + def test_non_memory_tools_are_kept_unchanged(self, tmp_path): + other_tool = Mock() + other_tool.__class__ = Mock # not a MemoryTool subclass + + upgraded = self.api.upgrade_tools([other_tool]) + + assert len(upgraded) == 1 + assert upgraded[0] is other_tool + + def test_mixed_tool_list_upgrades_only_memory_tools(self, tmp_path): + plain_memory = _memory_tool(tmp_path) + already_upgraded = AnthropicMemoryTool(memory_dir=str(tmp_path / "memory2")) + other_tool = Mock(spec=[]) + + upgraded = self.api.upgrade_tools([plain_memory, already_upgraded, other_tool]) + + assert len(upgraded) == 3 + # first: should have been upgraded + assert isinstance(upgraded[0], AnthropicMemoryTool) + assert upgraded[0] is not plain_memory + # second: already AnthropicMemoryTool, untouched + assert upgraded[1] is already_upgraded + # third: non-memory tool, untouched + assert upgraded[2] is other_tool + + def test_empty_tool_list_is_a_no_op(self): + upgraded = self.api.upgrade_tools([]) + + assert upgraded == [] + + def test_logger_info_called_for_each_upgraded_tool(self, tmp_path, caplog): + tool1 = _memory_tool(tmp_path) + tmp_path2 = tmp_path / "sub" + tmp_path2.mkdir() + tool2 = _memory_tool(tmp_path2) + + with caplog.at_level(logging.INFO): + self.api.upgrade_tools([tool1, tool2]) + + upgrade_logs = [r for r in caplog.records if "Auto-upgrading" in r.message] + assert len(upgrade_logs) == 2 diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py index 674294c..2c11966 100644 --- a/test/llm/test_anthropic_api.py +++ b/test/llm/test_anthropic_api.py @@ -98,7 +98,9 @@ def test_ask_successful_response(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "echo 'hello'", @@ -133,7 +135,9 @@ def test_ask_with_task_done_true(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": True, "command": "", @@ -157,12 +161,16 @@ def test_ask_with_retry_on_invalid_response(self): # Mock the Anthropic client to return invalid then valid response mock_invalid_response = Mock() + mock_invalid_response.stop_reason = "end_turn" mock_invalid_content = Mock() + mock_invalid_content.type = "text" mock_invalid_content.text = "invalid json" mock_invalid_response.content = [mock_invalid_content] mock_valid_response = Mock() + mock_valid_response.stop_reason = "end_turn" mock_valid_content = Mock() + mock_valid_content.type = "text" mock_valid_content.text = json.dumps({ "task_done": False, "command": "ls -la", @@ -193,7 +201,9 @@ def test_ask_appends_user_message(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "pwd", @@ -218,7 +228,9 @@ def test_ask_appends_assistant_response_as_json(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "echo test", @@ -247,7 +259,9 @@ def test_ask_uses_asdict_for_response(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" response_dict = { "task_done": True, "command": "", @@ -277,7 +291,9 @@ def test_ask_resets_retries_to_zero(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "ls", @@ -299,7 +315,9 @@ def test_ask_extracts_json_from_markdown(self): # Mock response with markdown-wrapped JSON mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = """Here's the response: ```json { @@ -420,7 +438,9 @@ def test_ask_with_empty_message(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "echo ''", @@ -443,7 +463,9 @@ def test_multiple_ask_calls_append_messages(self): # Mock the Anthropic client response mock_response = Mock() + mock_response.stop_reason = "end_turn" mock_content = Mock() + mock_content.type = "text" mock_content.text = json.dumps({ "task_done": False, "command": "pwd", @@ -513,6 +535,282 @@ def test_anthropic_api_clear_history_integration(self): assert len(api.messages) == 0 # Anthropic doesn't store system in messages +# ============================================================================ +# Tests for native_tools support (new changes) +# ============================================================================ + +@pytest.mark.unit +class TestAnthropicApiToolDictsInit: + """Tests for __init__ tool upgrade and tool_dicts extraction.""" + + @pytest.fixture(autouse=True) + def _use_patch(self, patch_anthropic_config): + pass + + def test_init_without_additional_tools_has_empty_tool_dicts(self): + api = AnthropicApi(system_prompt="test") + + assert api._tool_dicts == [] + + def test_init_with_none_additional_tools_has_empty_tool_dicts(self): + api = AnthropicApi(system_prompt="test", additional_tools=None) + + assert api._tool_dicts == [] + + def test_init_with_tool_having_to_dict_extracts_dicts(self): + tool = Mock() + tool.to_dict.return_value = {"name": "memory", "type": "memory_20250818"} + # Ensure it's not a MemoryTool so upgrade_tools won't touch it + tool.__class__ = Mock + + api = AnthropicApi(system_prompt="test", additional_tools=[tool]) + + assert api._tool_dicts == [{"name": "memory", "type": "memory_20250818"}] + + def test_init_sets_pending_tool_response_to_none(self): + api = AnthropicApi(system_prompt="test") + + assert api._pending_tool_response is None + + +@pytest.mark.unit +class TestAnthropicApiCallApiWithTools: + """Tests for _call_api including/excluding the tools kwarg.""" + + @pytest.fixture(autouse=True) + def _use_patch(self, patch_anthropic_config): + pass + + def test_call_api_without_tools_omits_tools_kwarg(self): + api = AnthropicApi(system_prompt="test", deployment_name="claude-3") + api.messages = [{"role": "user", "content": "hello"}] + api.ai_client.messages.create = Mock(return_value=Mock()) + + api._call_api() + + call_kwargs = api.ai_client.messages.create.call_args[1] + assert "tools" not in call_kwargs + + def test_call_api_with_tool_dicts_passes_them(self): + dicts = [{"name": "memory", "type": "memory_20250818"}] + api = AnthropicApi(system_prompt="test", deployment_name="claude-3") + api._tool_dicts = dicts + api.messages = [{"role": "user", "content": "hello"}] + api.ai_client.messages.create = Mock(return_value=Mock()) + + api._call_api() + + call_kwargs = api.ai_client.messages.create.call_args[1] + assert "tools" in call_kwargs + assert call_kwargs["tools"] == dicts + + +@pytest.mark.unit +class TestAnthropicApiAppendToolResult: + """Tests for _append_tool_result.""" + + @pytest.fixture(autouse=True) + def _use_patch(self, patch_anthropic_config): + pass + + @staticmethod + def _tool_use_block(name, tool_id="tu_001", input_data=None): + block = Mock() + block.type = "tool_use" + block.name = name + block.id = tool_id + block.input = input_data or {} + block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": name} + return block + + @staticmethod + def _text_block(text="hello"): + block = Mock() + block.type = "text" + block.text = text + block.model_dump.return_value = {"type": "text", "text": text} + return block + + def test_appends_assistant_message_first(self): + api = AnthropicApi(system_prompt="test") + + response = Mock() + response.content = [self._tool_use_block("memory")] + api._append_tool_result(response, "ok") + + assert api.messages[0]["role"] == "assistant" + + def test_appends_tool_result_user_message(self): + api = AnthropicApi(system_prompt="test") + + response = Mock() + response.content = [self._tool_use_block("memory", tool_id="tu_abc")] + api._append_tool_result(response, "file listing") + + user_msg = api.messages[1] + assert user_msg["role"] == "user" + assert user_msg["content"][0]["type"] == "tool_result" + assert user_msg["content"][0]["tool_use_id"] == "tu_abc" + assert user_msg["content"][0]["content"] == "file listing" + + def test_skips_non_tool_use_content_blocks(self): + api = AnthropicApi(system_prompt="test") + + response = Mock() + response.content = [ + self._text_block("thinking..."), + self._tool_use_block("memory", tool_id="tu_only"), + ] + api._append_tool_result(response, "result") + + tool_results = api.messages[1]["content"] + assert len(tool_results) == 1 + assert tool_results[0]["tool_use_id"] == "tu_only" + + def test_handles_multiple_tool_use_blocks(self): + api = AnthropicApi(system_prompt="test") + + response = Mock() + response.content = [ + self._tool_use_block("memory", tool_id="id_1"), + self._tool_use_block("bash", tool_id="id_2"), + ] + api._append_tool_result(response, "combined result") + + results = api.messages[1]["content"] + assert len(results) == 2 + assert results[0]["tool_use_id"] == "id_1" + assert results[0]["content"] == "combined result" + assert results[1]["tool_use_id"] == "id_2" + assert results[1]["content"] == "combined result" + + +@pytest.mark.unit +class TestAnthropicApiAskWithToolUse: + """Tests for ask() returning tool_use as LLMAskResponse and accepting tool results.""" + + @pytest.fixture(autouse=True) + def _use_patch(self, patch_anthropic_config): + pass + + @staticmethod + def _tool_use_response(tool_name, tool_id, input_data=None): + block = Mock() + block.type = "tool_use" + block.name = tool_name + block.id = tool_id + block.input = input_data or {} + block.model_dump.return_value = {"type": "tool_use", "id": tool_id, "name": tool_name} + response = Mock() + response.stop_reason = "tool_use" + response.content = [block] + return response + + @staticmethod + def _text_response(json_dict): + block = Mock() + block.type = "text" + block.text = json.dumps(json_dict) + block.model_dump.return_value = {"type": "text", "text": block.text} + response = Mock() + response.stop_reason = "end_turn" + response.content = [block] + return response + + def test_ask_returns_tool_use_as_ask_response(self): + dicts = [{"name": "memory", "type": "memory_20250818"}] + api = AnthropicApi(system_prompt="test") + api._tool_dicts = dicts + + tool_resp = self._tool_use_response("memory", "tu_1", {"command": "view", "path": "/memories"}) + api.ai_client.messages.create = Mock(return_value=tool_resp) + + result = api.ask("do the task") + + assert result.task_done is False + assert '"native_tool_calls"' in result.command + parsed = json.loads(result.command) + assert parsed["native_tool_calls"][0]["name"] == "memory" + assert parsed["native_tool_calls"][0]["id"] == "tu_1" + assert api._pending_tool_response is tool_resp + + def test_ask_stores_pending_tool_response(self): + api = AnthropicApi(system_prompt="test") + api._tool_dicts = [{"name": "memory"}] + + tool_resp = self._tool_use_response("memory", "tu_1") + api.ai_client.messages.create = Mock(return_value=tool_resp) + + api.ask("do it") + + assert api._pending_tool_response is tool_resp + + def test_ask_with_pending_tool_response_formats_tool_result(self): + api = AnthropicApi(system_prompt="test") + api._tool_dicts = [{"name": "memory"}] + + tool_resp = self._tool_use_response("memory", "tu_1") + final_resp = self._text_response({"task_done": False, "command": "ls /", "thoughts": ""}) + api.ai_client.messages.create = Mock(side_effect=[tool_resp, final_resp]) + + # First ask — returns tool_use + api.ask("do the task") + + # Second ask — sends tool result, formats as tool_result + result = api.ask("viewed /memories") + + assert result.command == "ls /" + assert api._pending_tool_response is None + + # Check messages contain the tool_result + tool_result_msgs = [ + m for m in api.messages + if m["role"] == "user" and isinstance(m["content"], list) + ] + assert len(tool_result_msgs) == 1 + assert tool_result_msgs[0]["content"][0]["type"] == "tool_result" + assert tool_result_msgs[0]["content"][0]["tool_use_id"] == "tu_1" + + def test_ask_without_tool_use_works_normally(self): + api = AnthropicApi(system_prompt="test") + + final_resp = self._text_response({"task_done": False, "command": "pwd", "thoughts": ""}) + api.ai_client.messages.create = Mock(return_value=final_resp) + + result = api.ask("where am I?") + + assert api.ai_client.messages.create.call_count == 1 + assert result.command == "pwd" + assert api._pending_tool_response is None + + def test_ask_extracts_thoughts_from_tool_use_response(self): + api = AnthropicApi(system_prompt="test") + api._tool_dicts = [{"name": "memory"}] + + # Build a tool_use response with a text block for thoughts + text_block = Mock() + text_block.type = "text" + text_block.text = "Let me check memory first" + text_block.model_dump.return_value = {"type": "text", "text": text_block.text} + + tool_block = Mock() + tool_block.type = "tool_use" + tool_block.name = "memory" + tool_block.id = "tu_1" + tool_block.input = {} + tool_block.model_dump.return_value = {"type": "tool_use", "id": "tu_1", "name": "memory"} + + response = Mock() + response.stop_reason = "tool_use" + response.content = [text_block, tool_block] + + api.ai_client.messages.create = Mock(return_value=response) + + result = api.ask("do the task") + + assert result.thoughts == "Let me check memory first" + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/test/llm/test_llm.py b/test/llm/test_llm.py index bd1b82d..af5a7d2 100644 --- a/test/llm/test_llm.py +++ b/test/llm/test_llm.py @@ -742,4 +742,32 @@ def test_task_done_true_with_not_none_command_field(self, llm): assert llm_response is None assert llm.retries == 1 assert len(llm.messages) == 1 - assert "When 'task_done' is true, 'command' should be an empty string." in llm.messages[0]["content"] \ No newline at end of file + + +@pytest.mark.unit +class TestUpgradeToolsDefault: + """Tests for the default LLMInterface.upgrade_tools no-op implementation.""" + + @pytest.fixture + def llm(self): + return ConcreteLLM() + + def test_returns_same_list(self, llm): + """Default upgrade_tools returns the input list unchanged.""" + tools = ["tool_a", "tool_b"] + result = llm.upgrade_tools(tools) + assert result is tools + + def test_empty_list(self, llm): + """Default upgrade_tools handles an empty list.""" + tools = [] + result = llm.upgrade_tools(tools) + assert result == [] + + def test_preserves_tool_order_and_identity(self, llm): + """Default upgrade_tools does not reorder or copy elements.""" + sentinel = object() + tools = [sentinel, "other"] + result = llm.upgrade_tools(tools) + assert result[0] is sentinel + assert result[1] == "other" \ No newline at end of file diff --git a/test/tools/tool_definitions/test_anthropic_memory_tool.py b/test/tools/tool_definitions/test_anthropic_memory_tool.py new file mode 100644 index 0000000..81db12c --- /dev/null +++ b/test/tools/tool_definitions/test_anthropic_memory_tool.py @@ -0,0 +1,421 @@ +""" +Unit tests for AnthropicMemoryTool. + +Covers: + - __init__: memory_dir / usage_instructions forwarding and defaults + - is_model_supported + - is_invoked + - clear_all / clear_all_memory (SDK override) + - SDK overrides: view, create, str_replace, insert, delete, rename + (happy-path + RuntimeError on failure) +""" +import logging +import pytest + +from anthropic.types.beta import ( + BetaMemoryTool20250818CreateCommand, + BetaMemoryTool20250818DeleteCommand, + BetaMemoryTool20250818InsertCommand, + BetaMemoryTool20250818RenameCommand, + BetaMemoryTool20250818StrReplaceCommand, + BetaMemoryTool20250818ViewCommand, +) + +from microbots.tools.tool_definitions.anthropic_memory_tool import ( + DEFAULT_MEMORY_INSTRUCTIONS, + AnthropicMemoryTool, +) +from microbots.tools.tool_definitions.memory_tool import MemoryTool + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def make_tool(tmp_path) -> AnthropicMemoryTool: + return AnthropicMemoryTool(memory_dir=str(tmp_path / "memory")) + + +# --------------------------------------------------------------------------- +# __init__ +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolInit: + + def test_is_subclass_of_memory_tool(self, tmp_path): + assert isinstance(make_tool(tmp_path), MemoryTool) + + def test_memory_dir_is_forwarded(self, tmp_path): + mem_dir = str(tmp_path / "my_memory") + tool = AnthropicMemoryTool(memory_dir=mem_dir) + assert str(tool._memory_dir) == mem_dir + + def test_memory_dir_is_created_on_init(self, tmp_path): + mem_dir = tmp_path / "new_memory" + assert not mem_dir.exists() + AnthropicMemoryTool(memory_dir=str(mem_dir)) + assert mem_dir.exists() + + def test_default_memory_dir_under_home(self, monkeypatch, tmp_path): + from pathlib import Path + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) + tool = AnthropicMemoryTool() + assert tool._memory_dir == tmp_path / ".microbots" / "memory" + + def test_custom_usage_instructions_are_stored(self, tmp_path): + custom = "custom instructions" + tool = AnthropicMemoryTool( + memory_dir=str(tmp_path / "memory"), + usage_instructions=custom, + ) + assert tool.usage_instructions_to_llm == custom + + def test_default_usage_instructions_are_applied_when_none(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.usage_instructions_to_llm == DEFAULT_MEMORY_INSTRUCTIONS + + +# --------------------------------------------------------------------------- +# is_model_supported +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolIsModelSupported: + + def test_returns_true_for_claude_models(self, tmp_path): + tool = make_tool(tmp_path) + for model in ("claude-3-sonnet", "claude-3-5-haiku", "Claude-Opus-4"): + assert tool.is_model_supported(model) is True + + def test_returns_false_for_non_claude_models(self, tmp_path): + tool = make_tool(tmp_path) + for model in ("gpt-4", "ollama/llama3", "azure-openai/gpt-5", ""): + assert tool.is_model_supported(model) is False + + +# --------------------------------------------------------------------------- +# is_invoked +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolIsInvoked: + + def test_returns_true_for_native_tool_calls_with_memory(self, tmp_path): + tool = make_tool(tmp_path) + import json + cmd = json.dumps({"native_tool_calls": [{"name": "memory", "id": "tu_1", "input": {}}]}) + assert tool.is_invoked(cmd) is True + + def test_returns_false_for_native_tool_calls_without_memory(self, tmp_path): + tool = make_tool(tmp_path) + import json + cmd = json.dumps({"native_tool_calls": [{"name": "bash", "id": "tu_1", "input": {}}]}) + assert tool.is_invoked(cmd) is False + + def test_returns_false_for_plain_commands(self, tmp_path): + tool = make_tool(tmp_path) + for cmd in ("memory view /memories", "memory clear", "anything", ""): + assert tool.is_invoked(cmd) is False + + +# --------------------------------------------------------------------------- +# clear_all / clear_all_memory +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolClearAll: + + def test_clear_all_removes_all_files(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "a.md").write_text("a") + (tool._memory_dir / "b.md").write_text("b") + + tool.clear_all() + + assert list(tool._memory_dir.iterdir()) == [] + + def test_clear_all_leaves_memory_dir_intact(self, tmp_path): + tool = make_tool(tmp_path) + tool.clear_all() + assert tool._memory_dir.exists() + + def test_clear_all_logs_info(self, tmp_path, caplog): + tool = make_tool(tmp_path) + with caplog.at_level(logging.INFO): + tool.clear_all() + assert "AnthropicMemoryTool" in caplog.text + assert "cleared" in caplog.text + + def test_clear_all_memory_returns_string(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.clear_all_memory() + assert result == "All memory cleared" + + def test_clear_all_memory_removes_files(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + tool.clear_all_memory() + + assert list(tool._memory_dir.iterdir()) == [] + + +# --------------------------------------------------------------------------- +# view (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolView: + + def test_view_returns_file_contents(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "notes.md").write_text("hello\nworld\n") + + cmd = BetaMemoryTool20250818ViewCommand( + command="view", path="/memories/notes.md", view_range=None + ) + result = tool.view(cmd) + + assert "hello" in result + assert "world" in result + + def test_view_with_view_range(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n") + + cmd = BetaMemoryTool20250818ViewCommand( + command="view", path="/memories/f.md", view_range=[2, 4] + ) + result = tool.view(cmd) + + assert "b" in result + assert "d" in result + assert "a" not in result + assert "e" not in result + + def test_view_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818ViewCommand( + command="view", path="/memories/nonexistent.md", view_range=None + ) + with pytest.raises(RuntimeError): + tool.view(cmd) + + +# --------------------------------------------------------------------------- +# create (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolCreate: + + def test_create_writes_file(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818CreateCommand( + command="create", path="/memories/new.md", file_text="hello world" + ) + result = tool.create(cmd) + + assert "new.md" in result + assert (tool._memory_dir / "new.md").read_text() == "hello world" + + def test_create_raises_runtime_error_on_failure(self, tmp_path): + """Ensures the `raise RuntimeError(result.stderr)` branch is exercised by + mocking _create to return a non-zero CmdReturn.""" + from unittest.mock import patch + from microbots.environment.Environment import CmdReturn + + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818CreateCommand( + command="create", path="/memories/new.md", file_text="x" + ) + with patch.object( + tool, "_create", return_value=CmdReturn(stdout="", stderr="disk full", return_code=1) + ): + with pytest.raises(RuntimeError, match="disk full"): + tool.create(cmd) + + +# --------------------------------------------------------------------------- +# str_replace (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolStrReplace: + + def test_str_replace_edits_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + cmd = BetaMemoryTool20250818StrReplaceCommand( + command="str_replace", + path="/memories/f.md", + old_str="hello", + new_str="goodbye", + ) + result = tool.str_replace(cmd) + + assert "f.md" in result + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + + def test_str_replace_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + cmd = BetaMemoryTool20250818StrReplaceCommand( + command="str_replace", + path="/memories/f.md", + old_str="not present", + new_str="x", + ) + with pytest.raises(RuntimeError): + tool.str_replace(cmd) + + +# --------------------------------------------------------------------------- +# insert (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolInsert: + + def test_insert_prepends_line(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + cmd = BetaMemoryTool20250818InsertCommand( + command="insert", + path="/memories/f.md", + insert_line=0, + insert_text="prepended", + ) + result = tool.insert(cmd) + + assert "0" in result or "prepended" in result or "f.md" in result + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + + def test_insert_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818InsertCommand( + command="insert", + path="/memories/missing.md", + insert_line=0, + insert_text="x", + ) + with pytest.raises(RuntimeError): + tool.insert(cmd) + + +# --------------------------------------------------------------------------- +# delete (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolDelete: + + def test_delete_removes_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + cmd = BetaMemoryTool20250818DeleteCommand( + command="delete", path="/memories/f.md" + ) + tool.delete(cmd) + + assert not (tool._memory_dir / "f.md").exists() + + def test_delete_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818DeleteCommand( + command="delete", path="/memories/nonexistent.md" + ) + with pytest.raises(RuntimeError): + tool.delete(cmd) + + +# --------------------------------------------------------------------------- +# rename (SDK override) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolRename: + + def test_rename_moves_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "old.md").write_text("content") + + cmd = BetaMemoryTool20250818RenameCommand( + command="rename", + old_path="/memories/old.md", + new_path="/memories/new.md", + ) + tool.rename(cmd) + + assert not (tool._memory_dir / "old.md").exists() + assert (tool._memory_dir / "new.md").read_text() == "content" + + def test_rename_raises_runtime_error_on_failure(self, tmp_path): + tool = make_tool(tmp_path) + cmd = BetaMemoryTool20250818RenameCommand( + command="rename", + old_path="/memories/missing.md", + new_path="/memories/new.md", + ) + with pytest.raises(RuntimeError): + tool.rename(cmd) + + +# --------------------------------------------------------------------------- +# invoke — non-memory tool calls are skipped +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestAnthropicMemoryToolInvoke: + + def test_invoke_skips_non_memory_tool_calls(self, tmp_path): + """The ``if tc["name"] != "memory": continue`` branch is exercised + when native_tool_calls contains a non-memory tool.""" + import json + from unittest.mock import Mock + + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello") + + command = json.dumps({ + "native_tool_calls": [ + {"name": "bash", "id": "tu_1", "input": {"command": "ls"}}, + {"name": "memory", "id": "tu_2", "input": { + "command": "view", "path": "/memories/f.md", "view_range": None, + }}, + ] + }) + + result = tool.invoke(command, parent_bot=Mock()) + + assert result.return_code == 0 + # Only the memory call should produce output; bash should be skipped + assert "hello" in result.stdout + + def test_invoke_catches_exception_from_tool_call(self, tmp_path): + """The ``except Exception`` branch is exercised when tool.call() raises.""" + import json + from unittest.mock import Mock, patch + + tool = make_tool(tmp_path) + + command = json.dumps({ + "native_tool_calls": [ + {"name": "memory", "id": "tu_1", "input": { + "command": "view", "path": "/memories/nonexistent.md", "view_range": None, + }}, + ] + }) + + # Force call() to raise an exception + with patch.object(tool, "call", side_effect=RuntimeError("boom")): + result = tool.invoke(command, parent_bot=Mock()) + + assert result.return_code == 0 + assert "Error executing tool 'memory'" in result.stdout + assert "boom" in result.stdout diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py new file mode 100644 index 0000000..089ca52 --- /dev/null +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -0,0 +1,583 @@ +""" +Unit tests for MemoryTool — file-backed memory store. + +All tests use pytest's tmp_path fixture so they are isolated from the +user's real ~/.microbots/memory directory. +""" +import sys +import os +import pytest +from pathlib import Path +from unittest.mock import Mock + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../src"))) + +from microbots.tools.tool_definitions.memory_tool import MemoryTool +from microbots.environment.Environment import CmdReturn + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def make_tool(tmp_path: Path) -> MemoryTool: + """Return a MemoryTool whose memory_dir lives under tmp_path.""" + return MemoryTool(memory_dir=str(tmp_path / "memory")) + + +# --------------------------------------------------------------------------- +# Initialisation +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolInit: + + def test_memory_dir_is_created_on_init(self, tmp_path): + mem_dir = tmp_path / "memory" + assert not mem_dir.exists() + + make_tool(tmp_path) + + assert mem_dir.exists() + assert mem_dir.is_dir() + + def test_default_memory_dir_under_home(self, monkeypatch, tmp_path): + """When no memory_dir is given it falls back to ~/.microbots/memory.""" + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) + tool = MemoryTool() + assert tool._memory_dir == tmp_path / ".microbots" / "memory" + + +# --------------------------------------------------------------------------- +# is_invoked +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolIsInvoked: + + def test_returns_true_for_memory_commands(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked("memory view /memories") is True + assert tool.is_invoked("memory create /memories/f.md hello") is True + + def test_returns_false_for_other_commands(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked("ls -la") is False + assert tool.is_invoked("cat file.txt") is False + assert tool.is_invoked("") is False + + def test_strips_leading_whitespace(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked(" memory view /memories") is True + + +# --------------------------------------------------------------------------- +# Path resolution (_resolve) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolResolve: + + def test_resolve_memories_root(self, tmp_path): + tool = make_tool(tmp_path) + assert tool._resolve("/memories") == tool._memory_dir.resolve() + + def test_resolve_memories_subpath(self, tmp_path): + tool = make_tool(tmp_path) + resolved = tool._resolve("/memories/notes.md") + assert resolved == (tool._memory_dir / "notes.md").resolve() + + def test_resolve_rejects_path_traversal(self, tmp_path): + tool = make_tool(tmp_path) + with pytest.raises(ValueError, match="Path traversal"): + tool._resolve("/memories/../../etc/passwd") + + def test_resolve_rejects_non_memory_paths(self, tmp_path): + tool = make_tool(tmp_path) + for bad in ("/workdir/file", "/home/user/file", "/tmp/file"): + with pytest.raises(ValueError): + tool._resolve(bad) + + def test_resolve_bare_relative_path_treated_as_relative_to_memory_dir(self, tmp_path): + """The else branch: a path without a /memories/ prefix is resolved + relative to memory_dir.""" + tool = make_tool(tmp_path) + resolved = tool._resolve("notes.md") + assert resolved == (tool._memory_dir / "notes.md").resolve() + + def test_resolve_bare_relative_subdir_path(self, tmp_path): + """A bare relative path with subdirectory components is also resolved + relative to memory_dir (else branch).""" + tool = make_tool(tmp_path) + resolved = tool._resolve("sub/dir/file.md") + assert resolved == (tool._memory_dir / "sub" / "dir" / "file.md").resolve() + + +# --------------------------------------------------------------------------- +# _view +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolView: + + def test_view_directory_lists_contents(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "notes.md").write_text("hello") + (tool._memory_dir / "sub").mkdir() + + result = tool._view(["/memories"]) + + assert result.return_code == 0 + assert "notes.md" in result.stdout + assert "sub/" in result.stdout + + def test_view_file_returns_numbered_lines(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\nline3\n") + + result = tool._view(["/memories/f.md"]) + + assert result.return_code == 0 + assert "1:" in result.stdout + assert "line1" in result.stdout + assert "3:" in result.stdout + + def test_view_file_with_line_range(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n") + + result = tool._view(["/memories/f.md", "--start", "2", "--end", "4"]) + + assert result.return_code == 0 + assert "b" in result.stdout + assert "d" in result.stdout + assert "a" not in result.stdout + assert "e" not in result.stdout + + def test_view_nonexistent_path_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool._view(["/memories/nonexistent.md"]) + + assert result.return_code != 0 + + def test_view_no_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._view([]) + assert result.return_code != 0 + + def test_view_unknown_flag_is_skipped(self, tmp_path): + """else: i += 1 — unrecognised flags are silently skipped.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello\n") + result = tool._view(["/memories/f.md", "--bogus", "value"]) + assert result.return_code == 0 + assert "hello" in result.stdout + + +# --------------------------------------------------------------------------- +# _create +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolCreate: + + def test_create_writes_file(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool._create(["/memories/notes.md", "hello world"]) + + assert result.return_code == 0 + assert (tool._memory_dir / "notes.md").read_text() == "hello world" + + def test_create_overwrites_existing_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("old content") + + result = tool._create(["/memories/f.md", "new content"]) + + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "new content" + + def test_create_creates_parent_directories(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool._create(["/memories/sub/dir/f.md", "content"]) + + assert result.return_code == 0 + assert (tool._memory_dir / "sub" / "dir" / "f.md").exists() + + def test_create_missing_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._create(["/memories/f.md"]) # missing content + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _str_replace +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolStrReplace: + + def test_str_replace_replaces_unique_text(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "goodbye"]) + + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + + def test_str_replace_fails_when_text_not_found(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + result = tool._str_replace(["/memories/f.md", "--old", "nothere", "--new", "x"]) + + assert result.return_code != 0 + assert "not found" in result.stderr.lower() + + def test_str_replace_fails_when_text_not_unique(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello hello") + + result = tool._str_replace(["/memories/f.md", "--old", "hello", "--new", "bye"]) + + assert result.return_code != 0 + assert "2" in result.stderr # appears N times + + def test_str_replace_missing_flags_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("text") + result = tool._str_replace(["/memories/f.md"]) + assert result.return_code != 0 + + def test_str_replace_empty_args_returns_usage_error(self, tmp_path): + """if not args branch: calling _str_replace([]) returns the usage message.""" + tool = make_tool(tmp_path) + result = tool._str_replace([]) + assert result.return_code == 1 + assert "Usage: memory str_replace" in result.stderr + + def test_str_replace_nonexistent_file_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._str_replace(["/memories/missing.md", "--old", "a", "--new", "b"]) + assert result.return_code != 0 + + def test_str_replace_unknown_flag_is_skipped(self, tmp_path): + """else: i += 1 — unrecognised flags in the arg loop are silently skipped.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + result = tool._str_replace([ + "/memories/f.md", "--bogus", "ignored", "--old", "hello", "--new", "goodbye" + ]) + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + + +# --------------------------------------------------------------------------- +# _insert +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolInsert: + + def test_insert_prepends_at_line_zero(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + result = tool._insert(["/memories/f.md", "--line", "0", "--text", "prepended"]) + + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + assert lines[1] == "line1" + + def test_insert_at_end_of_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + result = tool._insert(["/memories/f.md", "--line", "2", "--text", "appended"]) + + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[-1] == "appended" + + def test_insert_invalid_line_number_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\n") + + result = tool._insert(["/memories/f.md", "--line", "99", "--text", "x"]) + + assert result.return_code != 0 + + def test_insert_nonexistent_file_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._insert(["/memories/missing.md", "--line", "0", "--text", "x"]) + assert result.return_code != 0 + + def test_insert_missing_flags_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\n") + result = tool._insert(["/memories/f.md"]) + assert result.return_code != 0 + + def test_insert_empty_args_returns_usage_error(self, tmp_path): + """if not args branch: calling _insert([]) returns the usage message.""" + tool = make_tool(tmp_path) + result = tool._insert([]) + assert result.return_code == 1 + assert "Usage: memory insert" in result.stderr + + def test_insert_unknown_flag_is_skipped(self, tmp_path): + """else: i += 1 — unrecognised flags in the arg loop are silently skipped.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + result = tool._insert([ + "/memories/f.md", "--bogus", "ignored", "--line", "0", "--text", "prepended" + ]) + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + + +# --------------------------------------------------------------------------- +# _delete +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolDelete: + + def test_delete_removes_file(self, tmp_path): + tool = make_tool(tmp_path) + f = tool._memory_dir / "f.md" + f.write_text("data") + + result = tool._delete(["/memories/f.md"]) + + assert result.return_code == 0 + assert not f.exists() + + def test_delete_removes_directory(self, tmp_path): + tool = make_tool(tmp_path) + sub = tool._memory_dir / "sub" + sub.mkdir() + (sub / "f.md").write_text("data") + + result = tool._delete(["/memories/sub"]) + + assert result.return_code == 0 + assert not sub.exists() + + def test_delete_prevents_root_deletion(self, tmp_path): + tool = make_tool(tmp_path) + for path in ("/memories", "memories", "/memories/"): + result = tool._delete([path]) + assert result.return_code != 0 + + def test_delete_nonexistent_path_raises(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._delete(["/memories/nonexistent.md"]) + assert result.return_code != 0 + + def test_delete_no_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._delete([]) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _rename +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolRename: + + def test_rename_moves_file(self, tmp_path): + tool = make_tool(tmp_path) + src = tool._memory_dir / "old.md" + src.write_text("content") + + result = tool._rename(["/memories/old.md", "/memories/new.md"]) + + assert result.return_code == 0 + assert not src.exists() + assert (tool._memory_dir / "new.md").read_text() == "content" + + def test_rename_nonexistent_source_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._rename(["/memories/missing.md", "/memories/new.md"]) + assert result.return_code != 0 + + def test_rename_fails_if_destination_exists(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "a.md").write_text("a") + (tool._memory_dir / "b.md").write_text("b") + + result = tool._rename(["/memories/a.md", "/memories/b.md"]) + + assert result.return_code != 0 + + def test_rename_missing_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._rename(["/memories/a.md"]) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _clear +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolClear: + + def test_clear_removes_all_files(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "a.md").write_text("a") + (tool._memory_dir / "b.md").write_text("b") + + result = tool._clear() + + assert result.return_code == 0 + assert list(tool._memory_dir.iterdir()) == [] + + def test_clear_leaves_memory_dir_intact(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + tool._clear() + + assert tool._memory_dir.exists() + + +# --------------------------------------------------------------------------- +# is_model_supported +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolIsModelSupported: + + def test_returns_true_for_any_model(self, tmp_path): + tool = make_tool(tmp_path) + for model in ("gpt-4", "claude-3-sonnet", "ollama/llama3", ""): + assert tool.is_model_supported(model) is True + + +# --------------------------------------------------------------------------- +# invoke — full command dispatch +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolInvoke: + + def test_invoke_view_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello") + + result = tool.invoke("memory view /memories/f.md", parent_bot=Mock()) + + assert result.return_code == 0 + assert "hello" in result.stdout + + def test_invoke_create_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool.invoke('memory create /memories/n.md "some content"', parent_bot=Mock()) + + assert result.return_code == 0 + assert (tool._memory_dir / "n.md").read_text() == "some content" + + def test_invoke_clear_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + result = tool.invoke("memory clear", parent_bot=Mock()) + + assert result.return_code == 0 + assert list(tool._memory_dir.iterdir()) == [] + + def test_invoke_unknown_subcommand_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke("memory frobnicate /memories/f.md", parent_bot=Mock()) + assert result.return_code != 0 + assert "Unknown subcommand" in result.stderr + + def test_invoke_too_few_tokens_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke("memory", parent_bot=Mock()) + assert result.return_code != 0 + + def test_invoke_handles_bad_quoting_gracefully(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke('memory create /memories/f.md "unclosed', parent_bot=Mock()) + assert result.return_code != 0 + assert "Parse error" in result.stderr + + def test_invoke_str_replace_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + result = tool.invoke( + 'memory str_replace /memories/f.md --old "hello" --new "goodbye"', + parent_bot=Mock(), + ) + + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + + def test_invoke_insert_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + result = tool.invoke( + 'memory insert /memories/f.md --line 0 --text "prepended"', + parent_bot=Mock(), + ) + + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + + def test_invoke_delete_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + f = tool._memory_dir / "f.md" + f.write_text("data") + + result = tool.invoke("memory delete /memories/f.md", parent_bot=Mock()) + + assert result.return_code == 0 + assert not f.exists() + + def test_invoke_rename_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "old.md").write_text("content") + + result = tool.invoke( + "memory rename /memories/old.md /memories/new.md", + parent_bot=Mock(), + ) + + assert result.return_code == 0 + assert (tool._memory_dir / "new.md").read_text() == "content" + assert not (tool._memory_dir / "old.md").exists() + + def test_invoke_exception_returned_as_error_cmdreturn(self, tmp_path): + """except (ValueError, FileNotFoundError, RuntimeError) block: + a path-traversal path causes _resolve() to raise ValueError inside a + subcommand handler, which is caught and returned as CmdReturn(return_code=1).""" + tool = make_tool(tmp_path) + + # Path traversal triggers ValueError inside _view → caught by except block + result = tool.invoke( + "memory view /memories/../../etc/passwd", + parent_bot=Mock(), + ) + + assert result.return_code == 1 + assert result.stdout == "" + assert "traversal" in result.stderr.lower() or result.stderr != "" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"])