-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_agent.py
More file actions
193 lines (165 loc) · 8.96 KB
/
Copy pathtest_agent.py
File metadata and controls
193 lines (165 loc) · 8.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
"""
Test Script for Rupert's Agentic Loop
=====================================
This script simulates the ReAct loop directly to test tasks without the interactive shell.
"""
import os
import sys
import json
import time
import re
import urllib.request
import urllib.parse
# Ensure kernelweave is importable
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
from kernelweave_ollama import TOOLS, get_ollama_models
from kernelweave.kernel import KernelStore
from pathlib import Path
from kernelweave.runtime import KernelRuntime
def extract_json(text):
start = text.find('{')
if start == -1:
return None
count = 0
for i in range(start, len(text)):
if text[i] == '{':
count += 1
elif text[i] == '}':
count -= 1
if count == 0:
return text[start:i+1]
return None
def run_test(prompt, model_name="granite4.1:8b"):
print(f"Starting test with prompt: '{prompt}' using model: {model_name}")
# Initialize
store = KernelStore(Path("store"))
runtime = KernelRuntime(store, use_embeddings=True)
# Routing
plan = runtime.run(prompt)
print(f"[Router] Mode: {plan['mode']} | Kernel: {plan.get('kernel_id', 'none')}")
system_prompt = (
"You are Rupert, an advanced autonomous AI operating system running on Windows.\n"
"You must use tools by outputting a JSON object. You MUST include a 'thought' field for reasoning and a 'plan' list for multi-step tasks. For example:\n"
"{\n"
" \"thought\": \"I need to search the web to find the latest news.\",\n"
" \"plan\": [\"Search the web\", \"Read articles\", \"Summarize\"],\n"
" \"tool\": \"web_search\",\n"
" \"args\": {\"query\": \"latest news\"}\n"
"}\n"
"Available tools: `browser_browse`, `web_search`, `read_file`, `write_file`, `list_dir`, `run_command`.\n"
"CRITICAL RULES:\n"
"1. You are on Windows. Do NOT use Unix commands like `source`, `cat <<EOF`, or `ls`. Use Windows equivalents or use provided tools.\n"
"2. To create or edit files, ALWAYS use the `write_file` tool. Do NOT use `echo` or `cat` in `run_command` to write files.\n"
"3. Output the JSON block IMMEDIATELY. Do not put any text before or after it.\n"
"4. After writing or editing a file, you MUST verify it works by running it or reading it!"
)
conversation = (
"User: Search the web for quantum computing.\n"
"Rupert: {\"thought\": \"I need to search the web.\", \"plan\": [\"Search\"], \"tool\": \"web_search\", \"args\": {\"query\": \"quantum computing\"}}\n"
f"User: {prompt}"
)
max_iterations = 40
url = "http://127.0.0.1:11434/api/generate"
last_tool = None
last_args = None
repeat_count = 0
try:
for i in range(max_iterations):
print(f"\n--- Iteration {i+1} ---")
print("Thinking...", end="\r")
body = {"model": model_name, "prompt": f"{system_prompt}\n\n{conversation}", "stream": True, "format": "json"}
req = urllib.request.Request(url, data=json.dumps(body).encode('utf-8'), headers={"content-type": "application/json"})
text = ""
first_token = True
with urllib.request.urlopen(req, timeout=120) as response:
for line in response:
if line:
chunk = json.loads(line.decode('utf-8'))
token = chunk.get("response", "")
# Check for reasoning/thinking fields
reasoning = chunk.get("reasoning_content", "") or chunk.get("thinking", "")
if reasoning:
token = reasoning
if first_token and token.strip():
print(" " * 20, end="\r") # Clear "Thinking..."
first_token = False
print(token, end="", flush=True)
text += token
if len(text) > 4000:
print("\n\033[93m[Warning] Model output too long (>4000 chars), truncating...\033[0m")
break
print() # Newline after stream
# Check if model wants to use a tool
json_str = extract_json(text)
if json_str:
try:
tool_call = json.loads(json_str)
tool_name = tool_call.get("tool")
tool_args = tool_call.get("args", {})
# Fix for string arguments instead of dict
if isinstance(tool_args, str):
if tool_name == "run_command":
tool_args = {"command": tool_args}
elif tool_name in ["read_file", "list_dir"]:
tool_args = {"path": tool_args}
elif tool_name == "browser_browse":
tool_args = {"url": tool_args}
# State Tracker: Detect repeating actions
if tool_name == last_tool and tool_args == last_args:
repeat_count += 1
if repeat_count >= 2:
conversation += f"\n\nObservation: You are repeating the same action. If you have completed the task, please stop or move to the next step!"
print("\n[State Tracker] Detected repeating action. Warning injected.")
else:
last_tool = tool_name
last_args = tool_args
repeat_count = 0
if tool_name in TOOLS:
print(f"\n[Tool Execution] Invoking {tool_name} with {tool_args}")
tool_result = TOOLS[tool_name](**tool_args)
# Fallback for failed search
if tool_name == "web_search" and ("No results found" in tool_result or "failed" in tool_result.lower()):
print("[Fallback] Web search failed. Trying browser_browse with DuckDuckGo Lite...")
if "browser_browse" in TOOLS:
query_encoded = urllib.parse.quote(tool_args.get("query", ""))
# Use DuckDuckGo Lite to avoid captchas!
tool_result = TOOLS["browser_browse"](url=f"https://html.duckduckgo.com/html/?q={query_encoded}")
print(f"[Fallback Result] {tool_result}")
print(f"[Tool Result] {tool_result}")
conversation += f"\n\nObservation (Result of {tool_name}):\n{tool_result}\n\nContinue with your task."
continue
else:
print(f"\n[Error] Tool '{tool_name}' not found.")
conversation += f"\n\nObservation: Tool '{tool_name}' not found."
continue
except Exception as e:
print(f"\n[Error] Failed to execute tool: {e}")
conversation += f"\n\nObservation: Error parsing or executing tool: {e}. Please retry with valid JSON."
continue
else:
print("\nNo tool call detected. Task complete or model answered directly.")
break
except Exception as e:
print(f"\nError during test: {e}")
if __name__ == "__main__":
tasks = [
"Act as a full-stack developer. Create a mini web application. 1. Create a Python file named `app.py` with a simple Flask or HTTP server that serves a JSON list of products. 2. Create an HTML file named `index.html` with a clean UI that fetches that JSON and displays it in a grid. 3. Run the server in the background using `run_command`. 4. Use a python script to simulate a user fetching the API to verify it works! 5. Once verified, report success! Do not stop until you have verified it!"
]
# Use the model the user preferred or default
model = "granite4.1:8b"
# Check available models
models = get_ollama_models()
if models:
print(f"Available models: {models}")
if "granite4.1:8b" not in models:
model = models[0]
print(f"Defaulting to available model: {model}")
for i, task in enumerate(tasks):
print(f"\n==================================================")
print(f"RUNNING TASK {i+1}/{len(tasks)}")
print(f"Task: {task}")
print(f"==================================================")
try:
run_test(task, model_name=model)
except Exception as e:
print(f"Task {i+1} failed with error: {e}")