diff --git a/.github/workflows/dockerBuildPush.yml b/.github/workflows/dockerBuildPush.yml index 08341d0..ae10709 100644 --- a/.github/workflows/dockerBuildPush.yml +++ b/.github/workflows/dockerBuildPush.yml @@ -1,6 +1,7 @@ name: Build and Push to Docker Hub on: + workflow_dispatch: push: paths: - 'src/microbots/environment/local_docker/image_builder/Dockerfile' diff --git a/src/microbots/tools/tool_definitions/__init__.py b/src/microbots/tools/tool_definitions/__init__.py new file mode 100644 index 0000000..1aa24ce --- /dev/null +++ b/src/microbots/tools/tool_definitions/__init__.py @@ -0,0 +1 @@ +from microbots.tools.tool_definitions.memory_tool import MemoryTool diff --git a/src/microbots/tools/tool_definitions/memory_tool.py b/src/microbots/tools/tool_definitions/memory_tool.py new file mode 100644 index 0000000..0dd5ec2 --- /dev/null +++ b/src/microbots/tools/tool_definitions/memory_tool.py @@ -0,0 +1,311 @@ +import argparse +import logging +import os +import shlex +import shutil +from pathlib import Path +from typing import Optional + +from pydantic.dataclasses import dataclass, Field + +from microbots.environment.Environment import CmdReturn +from microbots.tools.external_tool import ExternalTool + +logger = logging.getLogger(" 🧠 MemoryTool") + + +class _NoExitArgumentParser(argparse.ArgumentParser): + """ArgumentParser that raises ``ValueError`` instead of calling ``sys.exit``.""" + + def error(self, message: str) -> None: # type: ignore[override] + raise ValueError(message) + +INSTRUCTIONS_TO_LLM = """ +Use this tool to persist information to files across steps. +All paths must be under /memories/. + +MEMORY PROTOCOL: +1. ALWAYS run `memory view /memories` BEFORE doing anything else to check for + earlier progress. +2. Record status, findings and intermediate results as you go. +3. Before completing a task, save your final results to memory. +4. Keep the memory folder organised — rename or delete stale files. + +## Commands + +View a file or list a directory: + memory view + memory view --start --end + +Create a file: + memory create + +Replace a unique string in a file: + memory str_replace --old "" --new "" + +Insert a line into a file (0 = prepend): + memory insert --line --text "" + +Delete a file or directory: + memory delete + +Rename / move a file: + memory rename + +Clear all memory: + memory clear + +## Examples + + memory view /memories + memory create /memories/progress.md "## Progress\\n- Found bug in src/foo.py line 42" + memory str_replace /memories/progress.md --old "line 42" --new "line 45" + memory insert /memories/progress.md --line 0 --text "# Task Notes" + memory view /memories/progress.md --start 1 --end 10 + memory delete /memories/old_notes.md + memory rename /memories/draft.md /memories/final.md + +## Notes +- Paths must start with /memories/. +- memory create overwrites if the file already exists. +- memory str_replace requires the old text to appear exactly once. +- In memory view, use --end -1 to read through the end of the file. +""" + + +@dataclass +class MemoryTool(ExternalTool): + """ + File-backed memory tool that dispatches through the text command loop and + works consistently across providers. + + Subclass of ``ExternalTool`` — all command lists are empty so + ``install_tool``, ``setup_tool``, ``verify_tool_installation``, and + ``uninstall_tool`` are all effective no-ops inherited from ``ExternalTool``. + + All files are stored under ``memory_dir`` on the host (default + ``~/.microbots/memory``). The LLM uses paths like ``/memories/notes.md`` + which are resolved relative to ``memory_dir``. + """ + + name: str = Field(default="memory") + description: str = Field( + default="File-backed memory store — view, create, edit, delete files under /memories/." + ) + usage_instructions_to_llm: str = Field(default=INSTRUCTIONS_TO_LLM) + memory_dir: Optional[str] = Field(default=None) + + def __post_init__(self): + base = Path(self.memory_dir) if self.memory_dir else Path.home() / ".microbots" / "memory" + self._memory_dir = base + self._memory_dir.mkdir(parents=True, exist_ok=True) + self._parser = self._build_parser() + + def _build_parser(self) -> _NoExitArgumentParser: + """Build the argparse parser with subparsers for each memory subcommand.""" + parser = _NoExitArgumentParser(prog="memory", add_help=False) + subs = parser.add_subparsers(dest="subcommand") + + p_view = subs.add_parser("view", add_help=False) + p_view.add_argument("path") + p_view.add_argument("--start", type=int, default=None) + p_view.add_argument("--end", type=int, default=None) + + p_create = subs.add_parser("create", add_help=False) + p_create.add_argument("path") + p_create.add_argument("content", nargs=argparse.REMAINDER) + + p_str = subs.add_parser("str_replace", add_help=False) + p_str.add_argument("path") + p_str.add_argument("--old", required=True) + p_str.add_argument("--new", required=True) + + p_ins = subs.add_parser("insert", add_help=False) + p_ins.add_argument("path") + p_ins.add_argument("--line", type=int, required=True) + p_ins.add_argument("--text", required=True) + + p_del = subs.add_parser("delete", add_help=False) + p_del.add_argument("path") + + p_ren = subs.add_parser("rename", add_help=False) + p_ren.add_argument("old_path") + p_ren.add_argument("new_path") + + subs.add_parser("clear", add_help=False) + + return parser + + # ---------------------------------------------------------------------- # + # Path helpers + # ---------------------------------------------------------------------- # + + def _resolve(self, path: str) -> Path: + """Resolve a /memories/… path to an absolute host path.""" + if not path.startswith("/"): + raise ValueError( + f"Invalid memory path: {path!r}. Paths must start with /memories/." + ) + + stripped = path.lstrip("/") + + # Reject any path containing '..' components before resolving + if ".." in Path(stripped).parts: + raise ValueError(f"Path traversal not allowed: {path!r}") + + if stripped != "memories" and not stripped.startswith("memories/"): + raise ValueError( + f"Invalid memory path: {path!r}. Paths must start with /memories/." + ) + + if stripped == "memories": + rel = "" + else: + rel = stripped[len("memories/"):] + + resolved = (self._memory_dir / rel).resolve() if rel else self._memory_dir.resolve() + # Use trailing separator to prevent prefix confusion with sibling dirs + memory_root = str(self._memory_dir.resolve()) + if resolved != self._memory_dir.resolve() and not str(resolved).startswith(memory_root + os.sep): + raise ValueError(f"Path traversal not allowed: {path!r}") + return resolved + + # ---------------------------------------------------------------------- # + # ToolAbstract interface + # ---------------------------------------------------------------------- # + + def is_invoked(self, command: str) -> bool: + cmd = command.strip() + return cmd == "memory" or cmd.startswith("memory ") + + def invoke(self, command: str, parent_bot) -> CmdReturn: + try: + tokens = shlex.split(command) + except ValueError as exc: + return CmdReturn(stdout="", stderr=f"Parse error: {exc}", return_code=1) + + try: + args = self._parser.parse_args(tokens[1:]) # skip "memory" + except ValueError as exc: + return CmdReturn(stdout="", stderr=str(exc), return_code=1) + + if args.subcommand is None: + return CmdReturn(stdout="", stderr="Usage: memory ...", return_code=1) + + dispatch = { + "view": self._view, + "create": self._create, + "str_replace": self._str_replace, + "insert": self._insert, + "delete": self._delete, + "rename": self._rename, + } + + try: + if args.subcommand == "clear": + return self._clear() + return dispatch[args.subcommand](args) + except (OSError, ValueError, RuntimeError, UnicodeDecodeError) as exc: + logger.error("🧠 MemoryTool error: %s", exc) + return CmdReturn(stdout="", stderr=str(exc), return_code=1) + + # ---------------------------------------------------------------------- # + # Subcommand handlers + # ---------------------------------------------------------------------- # + + def _view(self, args: argparse.Namespace) -> CmdReturn: + resolved = self._resolve(args.path) + if not resolved.exists(): + return CmdReturn(stdout="", stderr=f"Path not found: {args.path!r}", return_code=1) + + if resolved.is_dir(): + items = [ + (f"{item.name}/" if item.is_dir() else item.name) + for item in sorted(resolved.iterdir()) + if not item.name.startswith(".") + ] + result = f"Directory: {args.path}\n" + "\n".join(f"- {i}" for i in items) + return CmdReturn(stdout=result, stderr="", return_code=0) + + lines = resolved.read_text(encoding="utf-8").splitlines() + if args.start is not None or args.end is not None: + s = max(0, (args.start or 1) - 1) + e = len(lines) if (args.end is None or args.end == -1) else args.end + lines = lines[s:e] + base_num = s + 1 + else: + base_num = 1 + numbered = "\n".join(f"{i + base_num:4d}: {line}" for i, line in enumerate(lines)) + return CmdReturn(stdout=numbered, stderr="", return_code=0) + + def _create(self, args: argparse.Namespace) -> CmdReturn: + if not args.content: + return CmdReturn(stdout="", stderr="Usage: memory create ", return_code=1) + content = " ".join(args.content) + resolved = self._resolve(args.path) + resolved.parent.mkdir(parents=True, exist_ok=True) + resolved.write_text(content, encoding="utf-8") + logger.info("🧠 Memory file created: %s", args.path) + return CmdReturn(stdout=f"File created: {args.path}", stderr="", return_code=0) + + def _str_replace(self, args: argparse.Namespace) -> CmdReturn: + resolved = self._resolve(args.path) + if not resolved.is_file(): + return CmdReturn(stdout="", stderr=f"File not found: {args.path!r}", return_code=1) + content = resolved.read_text(encoding="utf-8") + count = content.count(args.old) + if count == 0: + return CmdReturn(stdout="", stderr=f"Text not found in {args.path!r}", return_code=1) + if count > 1: + return CmdReturn(stdout="", stderr=f"Text appears {count} times in {args.path!r} - must be unique", return_code=1) + resolved.write_text(content.replace(args.old, args.new, 1), encoding="utf-8") + return CmdReturn(stdout=f"File {args.path} has been edited.", stderr="", return_code=0) + + def _insert(self, args: argparse.Namespace) -> CmdReturn: + resolved = self._resolve(args.path) + if not resolved.is_file(): + return CmdReturn(stdout="", stderr=f"File not found: {args.path!r}", return_code=1) + file_lines = resolved.read_text(encoding="utf-8").splitlines() + if args.line < 0 or args.line > len(file_lines): + return CmdReturn(stdout="", stderr=f"Invalid line number {args.line}. Must be 0 - {len(file_lines)}.", return_code=1) + file_lines.insert(args.line, args.text.rstrip("\n")) + resolved.write_text("\n".join(file_lines) + "\n", encoding="utf-8") + return CmdReturn(stdout=f"Text inserted at line {args.line} in {args.path}.", stderr="", return_code=0) + + def _delete(self, args: argparse.Namespace) -> CmdReturn: + resolved = self._resolve(args.path) + if resolved == self._memory_dir.resolve(): + return CmdReturn(stdout="", stderr="Cannot delete the /memories root directory", return_code=1) + if resolved.is_file(): + resolved.unlink() + logger.info("🧠 Memory file deleted: %s", args.path) + return CmdReturn(stdout=f"Deleted: {args.path}", stderr="", return_code=0) + if resolved.is_dir(): + shutil.rmtree(resolved) + logger.info("🧠 Memory directory deleted: %s", args.path) + return CmdReturn(stdout=f"Deleted directory: {args.path}", stderr="", return_code=0) + return CmdReturn(stdout="", stderr=f"Path not found: {args.path!r}", return_code=1) + + def _rename(self, args: argparse.Namespace) -> CmdReturn: + old_resolved = self._resolve(args.old_path) + new_resolved = self._resolve(args.new_path) + memory_root = self._memory_dir.resolve() + if old_resolved == memory_root: + return CmdReturn(stdout="", stderr="Cannot rename the /memories root directory", return_code=1) + if new_resolved == memory_root: + return CmdReturn(stdout="", stderr="Cannot overwrite the /memories root directory", return_code=1) + if not old_resolved.exists(): + return CmdReturn(stdout="", stderr=f"Source not found: {args.old_path!r}", return_code=1) + if new_resolved.exists(): + return CmdReturn(stdout="", stderr=f"Destination already exists: {args.new_path!r}", return_code=1) + new_resolved.parent.mkdir(parents=True, exist_ok=True) + old_resolved.rename(new_resolved) + logger.info("🧠 Memory renamed: %s → %s", args.old_path, args.new_path) + return CmdReturn(stdout=f"Renamed {args.old_path} to {args.new_path}.", stderr="", return_code=0) + + def _clear(self) -> CmdReturn: + if self._memory_dir.exists(): + shutil.rmtree(self._memory_dir) + self._memory_dir.mkdir(parents=True, exist_ok=True) + logger.info("🧠 Memory cleared.") + return CmdReturn(stdout="Memory cleared.", stderr="", return_code=0) diff --git a/test/bot/test_memory_tool_integration.py b/test/bot/test_memory_tool_integration.py new file mode 100644 index 0000000..3c03df5 --- /dev/null +++ b/test/bot/test_memory_tool_integration.py @@ -0,0 +1,485 @@ +import json +import os +from unittest.mock import Mock, patch + +import pytest + +from microbots import MicroBot, BotRunResult +from microbots.constants import DOCKER_WORKING_DIR, PermissionLabels +from microbots.extras.mount import Mount +from microbots.tools.tool_definitions.memory_tool import MemoryTool + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_anthropic_response(task_done, thoughts, command=""): + """Build a mock Anthropic API response.""" + payload = json.dumps({ + "task_done": task_done, + "thoughts": thoughts, + "command": command, + }) + + text_block = Mock() + text_block.type = "text" + text_block.text = payload + + resp = Mock() + resp.stop_reason = "end_turn" + resp.content = [text_block] + return resp + + +def _make_openai_response(task_done, thoughts, command=""): + """Build a mock OpenAI API response with output_text.""" + resp = Mock() + resp.output_text = json.dumps({ + "task_done": task_done, + "thoughts": thoughts, + "command": command, + }) + return resp + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolWiring: + """Unit tests for MemoryTool dispatch with a mocked Anthropic client.""" + + @pytest.fixture() + def memory_dir(self, tmp_path): + d = tmp_path / "memory" + d.mkdir() + return d + + @pytest.fixture() + def bot(self, memory_dir): + """Create a MicroBot with a mocked Anthropic provider and a MemoryTool. + + The LLM client is mocked, but the rest of the stack is real: + tool dispatch and memory file operations. + """ + tool = MemoryTool( + memory_dir=str(memory_dir), + usage_instructions_to_llm="Use the memory tool to persist notes.", + ) + + mock_env = Mock() + mock_env.execute.return_value = Mock(return_code=0, stdout="", stderr="") + + anthropic_deployment = "claude-sonnet-4-5" + + with patch("microbots.llm.anthropic_api.Anthropic") as mock_anthropic_cls, \ + patch("microbots.llm.anthropic_api.api_key", "test-key"), \ + patch("microbots.llm.anthropic_api.endpoint", "https://api.anthropic.com"), \ + patch("microbots.llm.anthropic_api.deployment_name", anthropic_deployment): + + bot = MicroBot( + model=f"anthropic/{anthropic_deployment}", + system_prompt="You are a helpful assistant.", + additional_tools=[tool], + environment=mock_env, + ) + + self._mock_client = mock_anthropic_cls.return_value + yield bot + del bot + + # -- Upgrade verification ----------------------------------------------- + + def test_memory_tool_is_retained(self, bot): + """MemoryTool passed to MicroBot should remain provider-agnostic.""" + memory_tools = [t for t in bot.additional_tools if isinstance(t, MemoryTool)] + assert len(memory_tools) == 1, "Expected exactly one MemoryTool to remain attached" + + # -- Create file via text command --------------------------------------- + + def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir): + """LLM requests a memory create → MicroBot dispatches → file appears on disk.""" + # Sequence: + # 1. ask(task) → API returns JSON command (memory create) + # 2. ask(command output) → API returns end_turn (task_done=True) + self._mock_client.messages.create.side_effect = [ + _make_anthropic_response( + task_done=False, + thoughts="I'll save a note to memory.", + command='memory create /memories/notes.md "Hello from integration test"', + ), + _make_anthropic_response( + task_done=True, + thoughts="Saved a note to memory successfully.", + ), + ] + + result: BotRunResult = bot.run( + "Save a note saying 'Hello from integration test'", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + assert result.error is None + + # Verify the file was actually created on disk + # _resolve("/memories/notes.md") strips the "memories/" prefix → memory_dir/notes.md + created_file = memory_dir / "notes.md" + assert created_file.exists(), f"Expected {created_file} to be created" + assert created_file.read_text() == "Hello from integration test" + + # -- View file via text command ----------------------------------------- + + def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir): + """LLM requests a memory view → MicroBot dispatches → file content returned.""" + # Pre-create a file in memory + # _resolve("/memories/existing.md") → memory_dir/existing.md + (memory_dir / "existing.md").write_text("Previously saved content") + + self._mock_client.messages.create.side_effect = [ + _make_anthropic_response( + task_done=False, + thoughts="Let me check my memory.", + command="memory view /memories/existing.md", + ), + _make_anthropic_response( + task_done=True, + thoughts="Found previously saved content in memory.", + ), + ] + + result: BotRunResult = bot.run( + "Check your memory for existing notes", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + + # Verify the view result was passed back to the API as the next user message + calls = self._mock_client.messages.create.call_args_list + assert len(calls) == 2 + # The second call should have messages including the file content + second_call_messages = calls[1].kwargs.get("messages") or calls[1][1].get("messages", []) + user_messages = [ + m for m in second_call_messages + if m.get("role") == "user" and isinstance(m.get("content"), str) + ] + assert len(user_messages) >= 2, "Expected the command output to be sent as a user message" + assert "Previously saved content" in user_messages[-1]["content"] + + # -- Multiple tool calls in sequence ------------------------------------ + + def test_create_then_view_memory_file(self, bot, memory_dir): + """LLM creates a file, then views it — both dispatched via MicroBot loop.""" + self._mock_client.messages.create.side_effect = [ + # Step 1: create file + _make_anthropic_response( + task_done=False, + thoughts="Creating a todo list.", + command='memory create /memories/todo.md "- Fix bug #42\n- Write tests"', + ), + # Step 2: view file + _make_anthropic_response( + task_done=False, + thoughts="Let me verify what I wrote.", + command="memory view /memories/todo.md", + ), + # Step 3: done + _make_anthropic_response( + task_done=True, + thoughts="Created and verified the todo list.", + ), + ] + + result: BotRunResult = bot.run( + "Create a todo list and verify it was saved", + max_iterations=10, + timeout_in_seconds=30, + ) + + assert result.status is True + assert result.error is None + + # File should exist with correct content + created_file = memory_dir / "todo.md" + assert created_file.exists() + assert "Fix bug #42" in created_file.read_text() + + # -- Non-memory commands still go to environment ------------------------ + + def test_non_memory_commands_go_to_environment(self, bot): + """Regular shell commands should be dispatched to the environment, not the memory tool.""" + self._mock_client.messages.create.side_effect = [ + _make_anthropic_response( + task_done=False, + thoughts="Let me check the files.", + command="ls -la", + ), + _make_anthropic_response( + task_done=True, + thoughts="Done.", + ), + ] + + result: BotRunResult = bot.run( + "List the files", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + # The environment.execute should have been called with "ls -la" + bot.environment.execute.assert_called_with("ls -la") + + +# --------------------------------------------------------------------------- +# OpenAI wiring tests (mocked) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolWiringOpenAI: + """Unit tests for MemoryTool dispatch with a mocked OpenAI client.""" + + @pytest.fixture() + def memory_dir(self, tmp_path): + d = tmp_path / "memory" + d.mkdir() + return d + + @pytest.fixture() + def bot(self, memory_dir): + tool = MemoryTool( + memory_dir=str(memory_dir), + usage_instructions_to_llm="Use the memory tool to persist notes.", + ) + + mock_env = Mock() + mock_env.execute.return_value = Mock(return_code=0, stdout="", stderr="") + + openai_deployment = "gpt-4" + + with patch("microbots.llm.openai_api.OpenAI") as mock_openai_cls, \ + patch("microbots.llm.openai_api.api_key", "test-key"), \ + patch("microbots.llm.openai_api.endpoint", "https://api.openai.com"), \ + patch("microbots.llm.openai_api.deployment_name", openai_deployment): + + bot = MicroBot( + model=f"azure-openai/{openai_deployment}", + system_prompt="You are a helpful assistant.", + additional_tools=[tool], + environment=mock_env, + ) + + self._mock_client = mock_openai_cls.return_value + yield bot + del bot + + def test_memory_tool_is_retained(self, bot): + memory_tools = [t for t in bot.additional_tools if isinstance(t, MemoryTool)] + assert len(memory_tools) == 1 + + def test_create_memory_file_via_tool_dispatch(self, bot, memory_dir): + """LLM requests a memory create → MicroBot dispatches → file appears on disk.""" + self._mock_client.responses.create.side_effect = [ + _make_openai_response( + task_done=False, + thoughts="I'll save a note to memory.", + command='memory create /memories/notes.md "Hello from OpenAI test"', + ), + _make_openai_response( + task_done=True, + thoughts="Saved a note to memory successfully.", + ), + ] + + result: BotRunResult = bot.run( + "Save a note saying 'Hello from OpenAI test'", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + assert result.error is None + + created_file = memory_dir / "notes.md" + assert created_file.exists() + assert created_file.read_text() == "Hello from OpenAI test" + + def test_view_memory_file_via_tool_dispatch(self, bot, memory_dir): + """LLM requests a memory view → MicroBot dispatches → file content returned.""" + (memory_dir / "existing.md").write_text("Previously saved content") + + self._mock_client.responses.create.side_effect = [ + _make_openai_response( + task_done=False, + thoughts="Let me check my memory.", + command="memory view /memories/existing.md", + ), + _make_openai_response( + task_done=True, + thoughts="Found previously saved content in memory.", + ), + ] + + result: BotRunResult = bot.run( + "Check your memory for existing notes", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + + def test_non_memory_commands_go_to_environment(self, bot): + """Regular shell commands should be dispatched to the environment.""" + self._mock_client.responses.create.side_effect = [ + _make_openai_response( + task_done=False, + thoughts="Let me check the files.", + command="ls -la", + ), + _make_openai_response( + task_done=True, + thoughts="Done.", + ), + ] + + result: BotRunResult = bot.run( + "List the files", + max_iterations=5, + timeout_in_seconds=30, + ) + + assert result.status is True + bot.environment.execute.assert_called_with("ls -la") + + +# --------------------------------------------------------------------------- +# Real integration tests — require Azure OpenAI + Docker +# --------------------------------------------------------------------------- + +from microbots.llm.llm import llm_output_format_str + +def _memory_system_prompt(sandbox_path): + return f"""You are a helpful assistant with access to a shell environment. +The project code is available at {sandbox_path} in your shell environment. +All your responses must be in this JSON format: +{llm_output_format_str} +The properties (task_done, thoughts, command) are mandatory on each response. +When you are done, set task_done to true and command to an empty string. +""" + + +def _create_test_project(base_dir): + """Create a small Python project with deliberate code issues. + + Returns the path to the project directory. + """ + project = base_dir / "myproject" + project.mkdir() + + # utils.py — unused import 'os' + (project / "utils.py").write_text( + "import os\n" + "import json\n" + "\n" + "\n" + "def parse_config(raw: str) -> dict:\n" + ' """Parse a JSON config string."""\n' + " return json.loads(raw)\n" + ) + + # handler.py — missing null check around line 42 + lines = [f"# handler.py — request handler\n"] + lines += [f"# line {i}\n" for i in range(2, 42)] + lines.append("def handle_request(request):\n") # line 42 + lines.append(" user = request.get('user')\n") # line 43 + lines.append(" return user.name\n") # line 44 — will crash if user is None + (project / "handler.py").write_text("".join(lines)) + + # client.py — deprecated API call around line 88 + lines = [f"# client.py — HTTP client wrapper\n"] + lines += [f"# line {i}\n" for i in range(2, 88)] + lines.append("import urllib.request\n") # line 88 + lines.append("def fetch(url):\n") # line 89 + lines.append(" return urllib.request.urlopen(url).read() # deprecated: use requests\n") + (project / "client.py").write_text("".join(lines)) + + return project + + +@pytest.mark.integration +@pytest.mark.slow +class TestMemoryToolRealIntegration: + """End-to-end integration tests with Azure OpenAI. + + These tests exercise the full MicroBot → LLM → memory tool pipeline + with no mocking. A small Python project with deliberate code issues + is mounted into the Docker sandbox so the bot has real files to work on. + + Run with:: + + pytest -m integration test/bot/test_memory_tool_integration.py -v + + Requires ``AZURE_OPENAI_DEPLOYMENT_NAME`` (or defaults to + ``mini-swe-agent-gpt5``) and valid Azure OpenAI credentials. + """ + + @pytest.fixture() + def memory_dir(self, tmp_path): + d = tmp_path / "memory" + d.mkdir() + return d + + @pytest.fixture() + def test_project(self, tmp_path): + return _create_test_project(tmp_path) + + @pytest.fixture() + def memory_bot(self, memory_dir, test_project): + tool = MemoryTool( + memory_dir=str(memory_dir), + ) + + model = f"azure-openai/{os.getenv('AZURE_OPENAI_DEPLOYMENT_NAME', 'mini-swe-agent-gpt5')}" + + sandbox_path = f"{DOCKER_WORKING_DIR}/{test_project.name}" + folder_mount = Mount( + str(test_project), + sandbox_path, + PermissionLabels.READ_ONLY, + ) + + bot = MicroBot( + model=model, + system_prompt=_memory_system_prompt(sandbox_path), + additional_tools=[tool], + folder_to_mount=folder_mount, + ) + + yield bot + del bot + + def test_create_memory_file(self, memory_bot, memory_dir): + """MicroBot should use memory tool while reviewing code for issues.""" + result: BotRunResult = memory_bot.run( + task=( + "Review the Python project for code quality issues. " + "Check each .py file for unused imports, missing error handling, " + "and deprecated API usage. Report all issues you find." + ), + max_iterations=15, + timeout_in_seconds=300, + ) + + assert result.status is True, f"Task failed: {result.error}" + assert result.error is None + + saved_files = [f for f in memory_dir.rglob("*") if f.is_file()] + assert len(saved_files) >= 1, ( + f"Expected at least one file created in memory. " + f"Found: {saved_files}" + ) diff --git a/test/tools/tool_definitions/test_memory_tool.py b/test/tools/tool_definitions/test_memory_tool.py new file mode 100644 index 0000000..e47bc49 --- /dev/null +++ b/test/tools/tool_definitions/test_memory_tool.py @@ -0,0 +1,642 @@ +""" +Unit tests for MemoryTool — file-backed memory store. + +All tests use pytest's tmp_path fixture so they are isolated from the +user's real ~/.microbots/memory directory. +""" +import pytest +from argparse import Namespace +from pathlib import Path +from unittest.mock import Mock + +from microbots.tools.tool_definitions.memory_tool import MemoryTool + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def make_tool(tmp_path: Path) -> MemoryTool: + """Return a MemoryTool whose memory_dir lives under tmp_path.""" + return MemoryTool(memory_dir=str(tmp_path / "memory")) + + +# --------------------------------------------------------------------------- +# Initialisation +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolInit: + + def test_memory_dir_is_created_on_init(self, tmp_path): + mem_dir = tmp_path / "memory" + assert not mem_dir.exists() + + make_tool(tmp_path) + + assert mem_dir.exists() + assert mem_dir.is_dir() + + def test_default_memory_dir_under_home(self, monkeypatch, tmp_path): + """When no memory_dir is given it falls back to ~/.microbots/memory.""" + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) + tool = MemoryTool() + assert tool._memory_dir == tmp_path / ".microbots" / "memory" + + +# --------------------------------------------------------------------------- +# is_invoked +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolIsInvoked: + + def test_returns_true_for_memory_commands(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked("memory view /memories") is True + assert tool.is_invoked("memory create /memories/f.md hello") is True + + def test_returns_true_for_bare_memory(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked("memory") is True + assert tool.is_invoked("memory\n") is True + assert tool.is_invoked(" memory ") is True + + def test_returns_false_for_other_commands(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked("ls -la") is False + assert tool.is_invoked("cat file.txt") is False + assert tool.is_invoked("") is False + + def test_strips_leading_whitespace(self, tmp_path): + tool = make_tool(tmp_path) + assert tool.is_invoked(" memory view /memories") is True + + +# --------------------------------------------------------------------------- +# Path resolution (_resolve) +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolResolve: + + def test_resolve_memories_root(self, tmp_path): + tool = make_tool(tmp_path) + assert tool._resolve("/memories") == tool._memory_dir.resolve() + + def test_resolve_memories_subpath(self, tmp_path): + tool = make_tool(tmp_path) + resolved = tool._resolve("/memories/notes.md") + assert resolved == (tool._memory_dir / "notes.md").resolve() + + def test_resolve_rejects_path_traversal(self, tmp_path): + tool = make_tool(tmp_path) + with pytest.raises(ValueError, match="Path traversal"): + tool._resolve("/memories/../../etc/passwd") + + def test_resolve_rejects_symlink_escaping_memory_dir(self, tmp_path): + """A symlink inside memory_dir that resolves outside must be rejected + by the startswith guard (not the '..' check).""" + tool = make_tool(tmp_path) + outside = tmp_path / "outside" + outside.mkdir() + (outside / "secret.txt").write_text("sensitive") + # Create a symlink inside the memory dir pointing outside + link = tool._memory_dir / "escape" + link.symlink_to(outside) + with pytest.raises(ValueError, match="Path traversal"): + tool._resolve("/memories/escape/secret.txt") + + def test_resolve_rejects_non_memory_paths(self, tmp_path): + tool = make_tool(tmp_path) + for bad in ("/workdir/file", "/home/user/file", "/tmp/file"): + with pytest.raises(ValueError): + tool._resolve(bad) + + def test_resolve_rejects_bare_relative_paths(self, tmp_path): + """Bare relative paths without /memories/ prefix must be rejected + to match the documented contract.""" + tool = make_tool(tmp_path) + for bad in ("notes.md", "sub/dir/file.md", "file.txt", "memories/foo.md"): + with pytest.raises(ValueError, match="Paths must start with /memories/"): + tool._resolve(bad) + + +# --------------------------------------------------------------------------- +# _view +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolView: + + def test_view_directory_lists_contents(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "notes.md").write_text("hello") + (tool._memory_dir / "sub").mkdir() + + result = tool._view(Namespace(path="/memories", start=None, end=None)) + + assert result.return_code == 0 + assert "notes.md" in result.stdout + assert "sub/" in result.stdout + + def test_view_file_returns_numbered_lines(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\nline3\n") + + result = tool._view(Namespace(path="/memories/f.md", start=None, end=None)) + + assert result.return_code == 0 + assert "1:" in result.stdout + assert "line1" in result.stdout + assert "3:" in result.stdout + + def test_view_file_with_line_range(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("a\nb\nc\nd\ne\n") + + result = tool._view(Namespace(path="/memories/f.md", start=2, end=4)) + + assert result.return_code == 0 + assert "b" in result.stdout + assert "d" in result.stdout + assert "a" not in result.stdout + assert "e" not in result.stdout + + def test_view_nonexistent_path_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool._view(Namespace(path="/memories/nonexistent.md", start=None, end=None)) + + assert result.return_code != 0 + + def test_view_no_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke("memory view", parent_bot=Mock()) + assert result.return_code != 0 + + def test_view_unknown_flag_returns_error(self, tmp_path): + """Argparse rejects unrecognised flags.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello\n") + result = tool.invoke("memory view /memories/f.md --bogus value", parent_bot=Mock()) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _create +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolCreate: + + def test_create_writes_file(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool._create(Namespace(path="/memories/notes.md", content=["hello world"])) + + assert result.return_code == 0 + assert (tool._memory_dir / "notes.md").read_text() == "hello world" + + def test_create_overwrites_existing_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("old content") + + result = tool._create(Namespace(path="/memories/f.md", content=["new content"])) + + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "new content" + + def test_create_creates_parent_directories(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool._create(Namespace(path="/memories/sub/dir/f.md", content=["content"])) + + assert result.return_code == 0 + assert (tool._memory_dir / "sub" / "dir" / "f.md").exists() + + def test_create_missing_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke("memory create /memories/f.md", parent_bot=Mock()) # missing content + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _str_replace +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolStrReplace: + + def test_str_replace_replaces_unique_text(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + result = tool._str_replace(Namespace(path="/memories/f.md", old="hello", new="goodbye")) + + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + + def test_str_replace_fails_when_text_not_found(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + result = tool._str_replace(Namespace(path="/memories/f.md", old="nothere", new="x")) + + assert result.return_code != 0 + assert "not found" in result.stderr.lower() + + def test_str_replace_fails_when_text_not_unique(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello hello") + + result = tool._str_replace(Namespace(path="/memories/f.md", old="hello", new="bye")) + + assert result.return_code != 0 + assert "2" in result.stderr # appears N times + + def test_str_replace_missing_flags_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("text") + result = tool.invoke("memory str_replace /memories/f.md", parent_bot=Mock()) + assert result.return_code != 0 + + def test_str_replace_empty_args_returns_usage_error(self, tmp_path): + """Calling str_replace with no args returns an error via argparse.""" + tool = make_tool(tmp_path) + result = tool.invoke("memory str_replace", parent_bot=Mock()) + assert result.return_code == 1 + + def test_str_replace_nonexistent_file_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._str_replace(Namespace(path="/memories/missing.md", old="a", new="b")) + assert result.return_code != 0 + + def test_str_replace_unknown_flag_returns_error(self, tmp_path): + """Argparse rejects unrecognised flags.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + result = tool.invoke( + 'memory str_replace /memories/f.md --bogus ignored --old "hello" --new "goodbye"', + parent_bot=Mock(), + ) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _insert +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolInsert: + + def test_insert_prepends_at_line_zero(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + result = tool._insert(Namespace(path="/memories/f.md", line=0, text="prepended")) + + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + assert lines[1] == "line1" + + def test_insert_at_end_of_file(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + result = tool._insert(Namespace(path="/memories/f.md", line=2, text="appended")) + + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[-1] == "appended" + + def test_insert_invalid_line_number_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\n") + + result = tool._insert(Namespace(path="/memories/f.md", line=99, text="x")) + + assert result.return_code != 0 + + def test_insert_nonexistent_file_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._insert(Namespace(path="/memories/missing.md", line=0, text="x")) + assert result.return_code != 0 + + def test_insert_missing_flags_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\n") + result = tool.invoke("memory insert /memories/f.md", parent_bot=Mock()) + assert result.return_code != 0 + + def test_insert_empty_args_returns_usage_error(self, tmp_path): + """Calling insert with no args returns an error via argparse.""" + tool = make_tool(tmp_path) + result = tool.invoke("memory insert", parent_bot=Mock()) + assert result.return_code == 1 + + def test_insert_unknown_flag_returns_error(self, tmp_path): + """Argparse rejects unrecognised flags.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + result = tool.invoke( + 'memory insert /memories/f.md --bogus ignored --line 0 --text "prepended"', + parent_bot=Mock(), + ) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _delete +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolDelete: + + def test_delete_removes_file(self, tmp_path): + tool = make_tool(tmp_path) + f = tool._memory_dir / "f.md" + f.write_text("data") + + result = tool._delete(Namespace(path="/memories/f.md")) + + assert result.return_code == 0 + assert not f.exists() + + def test_delete_removes_directory(self, tmp_path): + tool = make_tool(tmp_path) + sub = tool._memory_dir / "sub" + sub.mkdir() + (sub / "f.md").write_text("data") + + result = tool._delete(Namespace(path="/memories/sub")) + + assert result.return_code == 0 + assert not sub.exists() + + def test_delete_prevents_root_deletion(self, tmp_path): + tool = make_tool(tmp_path) + for path in ("/memories", "/memories/"): + result = tool._delete(Namespace(path=path)) + assert result.return_code != 0 + + @pytest.mark.parametrize("path", [ + "/memories/.", # dot resolves to root + "/memories/./", # trailing slash variant + "//memories", # double leading slash + "//memories/", # double leading slash + trailing slash + ]) + def test_delete_prevents_root_deletion_normalized_paths(self, tmp_path, path): + """Root-equivalent paths that bypass the simple string guard must + still be rejected so that the entire memory directory is never + wiped out via ``_delete``.""" + tool = make_tool(tmp_path) + # Seed the directory so we can verify it survives + (tool._memory_dir / "keep.md").write_text("important") + + result = tool._delete(Namespace(path=path)) + + assert result.return_code != 0, f"path {path!r} was not rejected" + assert tool._memory_dir.exists(), f"path {path!r} deleted the memory root" + assert (tool._memory_dir / "keep.md").exists(), f"path {path!r} wiped memory contents" + + def test_delete_nonexistent_path_raises(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._delete(Namespace(path="/memories/nonexistent.md")) + assert result.return_code != 0 + + def test_delete_no_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke("memory delete", parent_bot=Mock()) + assert result.return_code != 0 + + +# --------------------------------------------------------------------------- +# _rename +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolRename: + + def test_rename_moves_file(self, tmp_path): + tool = make_tool(tmp_path) + src = tool._memory_dir / "old.md" + src.write_text("content") + + result = tool._rename(Namespace(old_path="/memories/old.md", new_path="/memories/new.md")) + + assert result.return_code == 0 + assert not src.exists() + assert (tool._memory_dir / "new.md").read_text() == "content" + + def test_rename_nonexistent_source_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool._rename(Namespace(old_path="/memories/missing.md", new_path="/memories/new.md")) + assert result.return_code != 0 + + def test_rename_fails_if_destination_exists(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "a.md").write_text("a") + (tool._memory_dir / "b.md").write_text("b") + + result = tool._rename(Namespace(old_path="/memories/a.md", new_path="/memories/b.md")) + + assert result.return_code != 0 + + def test_rename_missing_args_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke("memory rename /memories/a.md", parent_bot=Mock()) + assert result.return_code != 0 + + @pytest.mark.parametrize("old_path", [ + "/memories", + "/memories/", + "/memories/.", + "//memories", + ]) + def test_rename_prevents_renaming_root_as_source(self, tmp_path, old_path): + """Renaming the memory root itself must be rejected.""" + tool = make_tool(tmp_path) + (tool._memory_dir / "keep.md").write_text("important") + + result = tool._rename(Namespace(old_path=old_path, new_path="/memories/newdir")) + + assert result.return_code != 0, f"old_path {old_path!r} was not rejected" + assert tool._memory_dir.exists(), f"old_path {old_path!r} moved the memory root" + assert (tool._memory_dir / "keep.md").exists() + + @pytest.mark.parametrize("new_path", [ + "/memories", + "/memories/", + "/memories/.", + "//memories", + ]) + def test_rename_prevents_overwriting_root_as_destination(self, tmp_path, new_path): + """Renaming onto the memory root must be rejected.""" + tool = make_tool(tmp_path) + src = tool._memory_dir / "src.md" + src.write_text("data") + + result = tool._rename(Namespace(old_path="/memories/src.md", new_path=new_path)) + + assert result.return_code != 0, f"new_path {new_path!r} was not rejected" + assert src.exists() + + +# --------------------------------------------------------------------------- +# _clear +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolClear: + + def test_clear_removes_all_files(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "a.md").write_text("a") + (tool._memory_dir / "b.md").write_text("b") + + result = tool._clear() + + assert result.return_code == 0 + assert list(tool._memory_dir.iterdir()) == [] + + def test_clear_leaves_memory_dir_intact(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + tool._clear() + + assert tool._memory_dir.exists() + + +# --------------------------------------------------------------------------- +# is_model_supported +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolIsModelSupported: + + def test_returns_true_for_any_model(self, tmp_path): + tool = make_tool(tmp_path) + for model in ("gpt-4", "claude-3-sonnet", "ollama/llama3", ""): + assert tool.is_model_supported(model) is True + + +# --------------------------------------------------------------------------- +# invoke — full command dispatch +# --------------------------------------------------------------------------- + +@pytest.mark.unit +class TestMemoryToolInvoke: + + def test_invoke_view_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello") + + result = tool.invoke("memory view /memories/f.md", parent_bot=Mock()) + + assert result.return_code == 0 + assert "hello" in result.stdout + + def test_invoke_create_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + + result = tool.invoke('memory create /memories/n.md "some content"', parent_bot=Mock()) + + assert result.return_code == 0 + assert (tool._memory_dir / "n.md").read_text() == "some content" + + def test_invoke_clear_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("data") + + result = tool.invoke("memory clear", parent_bot=Mock()) + + assert result.return_code == 0 + assert list(tool._memory_dir.iterdir()) == [] + + def test_invoke_unknown_subcommand_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke("memory frobnicate /memories/f.md", parent_bot=Mock()) + assert result.return_code != 0 + assert "invalid choice" in result.stderr + + def test_invoke_too_few_tokens_returns_error(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke("memory", parent_bot=Mock()) + assert result.return_code != 0 + + def test_invoke_handles_bad_quoting_gracefully(self, tmp_path): + tool = make_tool(tmp_path) + result = tool.invoke('memory create /memories/f.md "unclosed', parent_bot=Mock()) + assert result.return_code != 0 + assert "Parse error" in result.stderr + + def test_invoke_str_replace_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("hello world") + + result = tool.invoke( + 'memory str_replace /memories/f.md --old "hello" --new "goodbye"', + parent_bot=Mock(), + ) + + assert result.return_code == 0 + assert (tool._memory_dir / "f.md").read_text() == "goodbye world" + + def test_invoke_insert_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "f.md").write_text("line1\nline2\n") + + result = tool.invoke( + 'memory insert /memories/f.md --line 0 --text "prepended"', + parent_bot=Mock(), + ) + + assert result.return_code == 0 + lines = (tool._memory_dir / "f.md").read_text().splitlines() + assert lines[0] == "prepended" + + def test_invoke_delete_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + f = tool._memory_dir / "f.md" + f.write_text("data") + + result = tool.invoke("memory delete /memories/f.md", parent_bot=Mock()) + + assert result.return_code == 0 + assert not f.exists() + + def test_invoke_rename_subcommand(self, tmp_path): + tool = make_tool(tmp_path) + (tool._memory_dir / "old.md").write_text("content") + + result = tool.invoke( + "memory rename /memories/old.md /memories/new.md", + parent_bot=Mock(), + ) + + assert result.return_code == 0 + assert (tool._memory_dir / "new.md").read_text() == "content" + assert not (tool._memory_dir / "old.md").exists() + + def test_invoke_exception_returned_as_error_cmdreturn(self, tmp_path): + """except (ValueError, FileNotFoundError, RuntimeError) block: + a path-traversal path causes _resolve() to raise ValueError inside a + subcommand handler, which is caught and returned as CmdReturn(return_code=1).""" + tool = make_tool(tmp_path) + + # Path traversal triggers ValueError inside _view → caught by except block + result = tool.invoke( + "memory view /memories/../../etc/passwd", + parent_bot=Mock(), + ) + + assert result.return_code == 1 + assert result.stdout == "" + assert "traversal" in result.stderr.lower() or result.stderr != "" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"])