Mirix-AI · L-u-k-e · Mar 18, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 21, 2026
diff --git a/.cursor/plans/message_ids_refactor_plan_9eb1b08f.plan.md b/.cursor/plans/message_ids_refactor_plan_9eb1b08f.plan.md
diff --git a/.cursorrules b/.cursorrules
@@ -74,7 +74,6 @@ The codebase is fully async-native. Violating these rules will break the server.
 #### 3. Agent Execution Flow
 - `step()` method is the main agent execution loop (like LangChain's AgentExecutor)
 - `inner_step()` handles single LLM interactions with tool calls
-- `save_agent()` persists agent state to database
 - Steps are logged to the `steps` table for audit/analytics (write-only)
 
 #### 4. Message Flow
@@ -117,9 +116,8 @@ Before suggesting changes, verify:
 2. **Do NOT** call `step_manager.get_step()` - steps are write-only audit logs
 3. **Do NOT** bypass `create_or_get_user()` - always ensure users exist first
 4. **Do NOT** create agents without proper `CreateAgent` schema objects
-5. **Do NOT** forget to persist agent state with `save_agent()`
-6. **Do NOT** use `message.step` relationship - it's never loaded in practice
-7. **Do NOT** add duplicate environment variables in settings.py
+5. **Do NOT** use `message.step` relationship - it's never loaded in practice
+6. **Do NOT** add duplicate environment variables in settings.py
 
 ### Testing Guidelines
 - Tests located in `tests/` directory

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -40,6 +40,20 @@ python scripts/start_server.py --port 8531
 
 ## Running Tests
 
+The preferred way to run tests is via the dockerized test script, which handles infrastructure automatically:
+
+```bash
+# Full suite with verbose output (preferred)
+./scripts/run_tests_with_docker.sh --podman -s -v --log-cli-level=INFO
+
+# Pass any pytest args after the flags
+./scripts/run_tests_with_docker.sh --podman -s -v --log-cli-level=INFO -k test_message_handling
+./scripts/run_tests_with_docker.sh --podman -s -v --log-cli-level=INFO -m "not integration"
+```
+
+**Required env var for tests**: `GEMINI_API_KEY`
+
+### Running without Docker (manual infra)
 ```bash
 # Fast unit tests — no running server needed (~20s)
 pytest tests/test_memory_server.py -v
@@ -50,13 +64,8 @@ pytest -m "not integration" -v
 # Integration tests — requires server on port 8899
 python scripts/start_server.py --port 8899          # Terminal 1
 pytest tests/test_memory_integration.py -v -m integration -s   # Terminal 2
-
-# Full suite
-pytest -v
 ```
 
-**Required env var for tests**: `GEMINI_API_KEY`
-
 ## Common Dev Tasks
 
 ### Add a new API endpoint
@@ -74,6 +83,10 @@ pytest -v
 
 ### Format & lint
 ```bash
+# Preferred (poetry)
+poetry run black . && poetry run isort .
+
+# Alternatively via make
 make format   # ruff import sort + format
 make lint     # ruff check + pyright
 make check    # format + lint + test

diff --git a/docs/Mirix_async_native_changes.md b/docs/Mirix_async_native_changes.md
@@ -99,8 +99,8 @@ Bedrock) use their respective async SDK classes. Streaming responses are
 `asyncio.sleep()`.
 
 **Agent execution** (`mirix/agent/agent.py`)
-`step()`, `inner_step()`, `_get_ai_reply()`, `_handle_ai_response()`,
-`execute_tool_and_persist_state()`, and `save_agent()` are all async.
+`step()`, `inner_step()`, `_get_ai_reply()`, and `_handle_ai_response()`
+are all async.
 Built-in tools (core, memory, extras) are async. User-defined tools
 execute in `ToolExecutionSandbox` via `asyncio.create_subprocess_exec()`
 (no thread pool).

diff --git a/docs/architecture.html b/docs/architecture.html
@@ -1602,7 +1602,7 @@ <h3>In-Context Messages Per Agent</h3>
                         window sent to
                         the LLM on each call.
                         Messages are stored in the database and loaded via
-                        <code>agent_manager.get_in_context_messages()</code>.
+                        <code>message_manager.get_messages_for_agent_user()</code>.
                     </div>
                     <div class="code-block" style="margin-top: 15px;">
                         <pre><span class="comment"># Message structure for each agent</span>
@@ -1798,7 +1798,7 @@ <h3 style="color: #4ec9b0; margin: 30px 0 20px;">Step Execution Flow</h3>
                             <strong>inner_step()</strong>
                             <span style="color: #858585; margin-left: 10px;">Single async LLM call</span>
                             <div class="code-block" style="margin-top: 10px;">
-                                <pre>in_context_messages = <span class="keyword">await</span> self.agent_manager.<span class="function">get_in_context_messages</span>(...)
+                                <pre>in_context_messages = <span class="keyword">await</span> self.message_manager.<span class="function">get_messages_for_agent_user</span>(...)
 complete_prompt = <span class="keyword">await</span> self.<span class="function">build_system_prompt_with_memories</span>(raw_system)
 response = <span class="keyword">await</span> self.<span class="function">_get_ai_reply</span>(input_message_sequence)
 messages, continue_chaining, failed = <span class="keyword">await</span> self.<span class="function">_handle_ai_response</span>(response)</pre>

diff --git a/mirix/agent/__init__.py b/mirix/agent/__init__.py
@@ -23,7 +23,6 @@
     "app_utils",
     "Agent",
     "AgentState",
-    "save_agent",
     "BackgroundAgent",
     "CoreMemoryAgent",
     "EpisodicMemoryAgent",
@@ -35,7 +34,7 @@
     "SemanticMemoryAgent",
 ]
 
-from mirix.agent.agent import Agent, AgentState, save_agent
+from mirix.agent.agent import Agent, AgentState
 from mirix.agent.background_agent import BackgroundAgent
 from mirix.agent.core_memory_agent import CoreMemoryAgent
 from mirix.agent.episodic_memory_agent import EpisodicMemoryAgent