Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Think of it as a Stagehand for interactive CLIs.

```python
def test_create_web_project():
with Flow.spawn('python setup_wizard.py', cassette=cassette) as f:
with Flow.spawn('python setup_wizard.py') as f:
f.expect('Welcome to Project Setup Wizard')

f.step("Enter project name 'mywebapp' and press enter")
Expand Down Expand Up @@ -47,3 +47,7 @@ Example - CI mode (fail if cassette is missing):
```bash
RECORD_MODE=none pytest tests/test_cli.py
```

Cassettes are stored in `<project_root>/.cassettes/`:
- CLI cassettes (LLM responses): `.cassettes/cli/`
- HTTP cassettes (API recordings): `.cassettes/http/`
110 changes: 57 additions & 53 deletions src/noot/addons/spy_mode.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"""
Mitmproxy addon implementing mode-aware API spy.
Mitmproxy addon implementing mode-aware HTTP cassette recording/replay.

Behavior by mode (RECORD_MODE env var):
- once (default): Record if no recordings exist, replay if they do
- none: Replay only, fail if no match found (use in CI)
- all: Always re-record, overwriting existing recordings

On startup → load existing recordings from most recent file
On startup → load existing HTTP cassettes from most recent file
On request → check for match, replay if found
On response → record new interaction (when recording)
On shutdown → save recordings (when recording)
Expand Down Expand Up @@ -102,46 +102,47 @@ class SpyModeAddon:
- all: Always re-record
"""

def __init__(self, recordings_dir: Path | None = None):
# Check environment variable if recordings_dir not provided
if not recordings_dir:
env_path = os.environ.get("MITM_RECORDINGS_DIR")
def __init__(self, http_cassettes_dir: Path | None = None):
# Check environment variable if http_cassettes_dir not provided
if not http_cassettes_dir:
env_path = os.environ.get("MITM_HTTP_CASSETTES_DIR")
if env_path:
recordings_dir = Path(env_path)
http_cassettes_dir = Path(env_path)

self.recordings_dir = recordings_dir
self.http_cassettes_dir = http_cassettes_dir

# Read record mode from environment (default to "once")
self.record_mode = os.environ.get("MITM_RECORD_MODE", "once").lower()

# Existing recordings (loaded from most recent file)
self.existing_recordings: list[RecordedInteraction] = []
# Existing HTTP cassettes (loaded from most recent file)
self.existing_cassettes: list[RecordedInteraction] = []

# New recordings made during this session
self.new_recordings: list[RecordedInteraction] = []
# New HTTP cassettes recorded during this session
self.new_cassettes: list[RecordedInteraction] = []

# Determine if we should record based on mode and existing recordings
self._should_record = False
self._load_most_recent()

# Set recording flag based on mode and existing cassettes
if self.record_mode == "all":
# Always record, clear existing
self._should_record = True
self.existing_recordings = []
self.existing_cassettes = []
elif self.record_mode == "none":
# Replay only
self._should_record = False
else: # "once" (default)
# Record if no existing recordings, otherwise replay
self._should_record = len(self.existing_recordings) == 0
# Record if no existing cassettes, otherwise replay
self._should_record = len(self.existing_cassettes) == 0

def _find_most_recent_recording(self) -> Path | None:
"""Find the most recent recording file in the recordings directory."""
if not self.recordings_dir or not self.recordings_dir.exists():
def _find_most_recent_cassette(self) -> Path | None:
"""Find the most recent HTTP cassette file in the cassettes directory."""
if not self.http_cassettes_dir or not self.http_cassettes_dir.exists():
return None

# Find all JSON files in the directory
json_files = list(self.recordings_dir.glob("*.json"))
json_files = list(self.http_cassettes_dir.glob("*.json"))
if not json_files:
return None

Expand All @@ -150,80 +151,83 @@ def _find_most_recent_recording(self) -> Path | None:
return json_files[0]

def _load_most_recent(self) -> None:
"""Load existing recordings from the most recent file."""
recent_file = self._find_most_recent_recording()
"""Load existing HTTP cassettes from the most recent file."""
recent_file = self._find_most_recent_cassette()
if not recent_file:
print(f"[SpyMode] No existing recordings found in {self.recordings_dir}")
print(
f"[SpyMode] No existing HTTP cassettes found in "
f"{self.http_cassettes_dir}"
)
return

try:
data = json.loads(recent_file.read_text())
self.existing_recordings = [
self.existing_cassettes = [
RecordedInteraction.from_dict(item)
for item in data.get("interactions", [])
]
print(
f"[SpyMode] Loaded {len(self.existing_recordings)} "
f"recordings from {recent_file.name}"
f"[SpyMode] Loaded {len(self.existing_cassettes)} "
f"HTTP cassettes from {recent_file.name}"
)
except Exception as e:
print(f"[SpyMode] Error loading recordings: {e}")
self.existing_recordings = []
print(f"[SpyMode] Error loading HTTP cassettes: {e}")
self.existing_cassettes = []

def _save_new_recordings(self) -> None:
"""Save new recordings to a timestamped file."""
if not self.new_recordings or not self.recordings_dir:
def _save_new_cassettes(self) -> None:
"""Save new HTTP cassettes to a timestamped file."""
if not self.new_cassettes or not self.http_cassettes_dir:
return

self.recordings_dir.mkdir(parents=True, exist_ok=True)
self.http_cassettes_dir.mkdir(parents=True, exist_ok=True)

timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output_file = self.recordings_dir / f"{timestamp}.json"
output_file = self.http_cassettes_dir / f"{timestamp}.json"

# Combine existing and new recordings
all_recordings = self.existing_recordings + self.new_recordings
# Combine existing and new cassettes
all_cassettes = self.existing_cassettes + self.new_cassettes

data = {
"version": "1.0",
"interactions": [r.to_dict() for r in all_recordings],
"interactions": [r.to_dict() for r in all_cassettes],
}

output_file.write_text(json.dumps(data, indent=2))
print(
f"[SpyMode] Saved {len(self.new_recordings)} "
f"new recordings to {output_file.name}"
f"[SpyMode] Saved {len(self.new_cassettes)} "
f"new HTTP cassettes to {output_file.name}"
)
print(f"[SpyMode] Total recordings: {len(all_recordings)}")
print(f"[SpyMode] Total HTTP cassettes: {len(all_cassettes)}")

def load(self, loader: Loader) -> None:
"""Called when addon is loaded."""
loader.add_option(
name="spy_recordings_dir",
name="spy_http_cassettes_dir",
typespec=str,
default="",
help="Directory for spy mode recordings",
help="Directory for HTTP cassettes",
)

def request(self, flow: http.HTTPFlow) -> None:
"""
Intercept incoming request.

Check if we have a recorded response for this request.
Check if we have a cassette response for this request.
If yes, replay it. If no, let it pass through to real API.
"""
# Check existing recordings first, then new ones
all_recordings = self.existing_recordings + self.new_recordings
# Check existing cassettes first, then new ones
all_cassettes = self.existing_cassettes + self.new_cassettes

for recording in all_recordings:
if recording.matches(flow):
for cassette in all_cassettes:
if cassette.matches(flow):
url = flow.request.pretty_url
print(f"[SpyMode] REPLAY: {flow.request.method} {url}")

# Create response from recording
# Create response from cassette
flow.response = http.Response.make(
status_code=recording.response["status_code"],
content=recording.response["content"].encode("utf-8"),
headers=recording.response["headers"],
status_code=cassette.response["status_code"],
content=cassette.response["content"].encode("utf-8"),
headers=cassette.response["headers"],
)

# Mark that this was replayed (prevent recording in response())
Expand All @@ -249,14 +253,14 @@ def response(self, flow: http.HTTPFlow) -> None:

# Only record when in recording mode
if self._should_record:
recording = RecordedInteraction.from_flow(flow)
cassette = RecordedInteraction.from_flow(flow)
print(f"[SpyMode] RECORD: {flow.request.method} {flow.request.pretty_url}")
self.new_recordings.append(recording)
self.new_cassettes.append(cassette)

def done(self) -> None:
"""Called when mitmproxy is shutting down."""
if self._should_record and self.new_recordings:
self._save_new_recordings()
if self._should_record and self.new_cassettes:
self._save_new_cassettes()


# Entry point for mitmproxy
Expand Down
66 changes: 57 additions & 9 deletions src/noot/cache.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,52 @@
"""LLM response caching for deterministic test replay."""
"""CLI cassette caching for deterministic test replay."""

import json
import os
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path

from noot.project import ProjectRootNotFoundError, find_project_root


class CassettePathError(Exception):
"""Raised when cassette path cannot be determined."""

def __init__(self, message: str):
super().__init__(message)


def get_cli_cassettes_dir() -> Path:
"""
Get the default CLI cassettes directory.

Resolution order:
1. NOOT_CASSETTE_DIR environment variable
2. <project_root>/.cassettes/cli/ (based on .git location)

Raises:
CassettePathError: If no .git directory is found and
NOOT_CASSETTE_DIR is not set.
"""
# Check for explicit env var first
env_dir = os.environ.get("NOOT_CASSETTE_DIR")
if env_dir:
return Path(env_dir)

try:
root = find_project_root()
except ProjectRootNotFoundError as e:
raise CassettePathError(
"Cannot determine cassette directory: "
f"no .git found starting from {e.start_dir}.\n"
"Either:\n"
" 1. Run from within a git repository, or\n"
" 2. Set NOOT_CASSETTE_DIR environment variable, or\n"
" 3. Pass an explicit cassette path: "
"Flow.spawn(..., cassette='path/to/cassette.json')"
) from e
return root / ".cassettes" / "cli"


class RecordMode(Enum):
"""Recording mode for cassettes."""
Expand All @@ -17,7 +58,7 @@ class RecordMode(Enum):

@dataclass
class CacheEntry:
"""A single cached LLM response."""
"""A single cached LLM response in a CLI cassette."""

instruction: str
screen: str
Expand All @@ -29,9 +70,9 @@ class CacheEntry:
@dataclass
class Cache:
"""
LLM response cache for deterministic replay.
CLI cassette cache for deterministic replay.

Caches are keyed by instruction only. For expect() calls, assertion code
CLI cassettes are keyed by instruction only. For expect() calls, assertion code
is stored and replayed deterministically without screen comparison.
"""

Expand All @@ -45,6 +86,11 @@ def from_env(cls, cassette_path: Path | None = None) -> "Cache":
"""
Create cache based on RECORD_MODE environment variable.

Args:
cassette_path: Path to CLI cassette file. If not specified and
mode is record/replay, uses default directory based on
.git location.

Values:
- "once" (default): Record if cassette missing, replay if exists
- "none": Replay only, fail if request not found (use in CI)
Expand All @@ -58,6 +104,10 @@ def from_env(cls, cassette_path: Path | None = None) -> "Cache":
else:
mode = RecordMode.ONCE

# Determine cassette path - always set a default path
if cassette_path is None:
cassette_path = get_cli_cassettes_dir() / "default.json"

cache = cls(mode=mode, path=cassette_path)

# Determine behavior based on mode and cassette existence
Expand All @@ -84,7 +134,7 @@ def from_env(cls, cassette_path: Path | None = None) -> "Cache":
return cache

def _load(self) -> None:
"""Load cache entries from file."""
"""Load CLI cassette entries from file."""
if not self.path or not self.path.exists():
return
data = json.loads(self.path.read_text())
Expand All @@ -100,7 +150,7 @@ def _load(self) -> None:
]

def save(self) -> None:
"""Save cache entries to file."""
"""Save CLI cassette entries to file."""
if not self.path:
return
self.path.parent.mkdir(parents=True, exist_ok=True)
Expand All @@ -118,9 +168,7 @@ def save(self) -> None:
data = {"entries": entries}
self.path.write_text(json.dumps(data, indent=2))

def get(
self, instruction: str, screen: str, method: str
) -> str | None:
def get(self, instruction: str, screen: str, method: str) -> str | None:
"""
Look up a cached response.

Expand Down
Loading