-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_tests.py
More file actions
executable file
·190 lines (151 loc) · 5.82 KB
/
run_tests.py
File metadata and controls
executable file
·190 lines (151 loc) · 5.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/usr/bin/env python3
"""Quick test script for LLMCompiler."""
import sys
sys.path.insert(0, '.')
def test_imports():
"""Test that all modules can be imported."""
print("="*60)
print("Test 1: Module Imports")
print("="*60)
try:
from src.models.task import Task, TaskStatus
from src.models.state import GraphState
from src.parsers.task_parser import parse_tasks
from src.tools.registry import get_all_tools, get_tool_by_name
print("✓ All modules imported successfully\n")
return True
except Exception as e:
print(f"✗ Import failed: {e}\n")
return False
def test_task_model():
"""Test Task model functionality."""
print("="*60)
print("Test 2: Task Model")
print("="*60)
try:
from src.models.task import Task, TaskStatus
# Create a task
task = Task(
idx=1,
name="search_tool",
args={"query": "test"},
dependencies=[]
)
# Test initial state
assert task.idx == 1, "Task idx incorrect"
assert task.name == "search_tool", "Task name incorrect"
assert task.status == TaskStatus.PENDING, "Initial status should be PENDING"
print(" ✓ Task creation works")
# Test readiness
assert task.is_ready(set()), "Task with no dependencies should be ready"
print(" ✓ Dependency checking works")
# Test status changes
task.mark_running()
assert task.status == TaskStatus.RUNNING, "Status should be RUNNING"
print(" ✓ Status changes work")
task.mark_completed("test result")
assert task.status == TaskStatus.COMPLETED, "Status should be COMPLETED"
assert task.result == "test result", "Result should be stored"
print(" ✓ Task completion works")
# Test with dependencies
task2 = Task(idx=2, name="test", args={}, dependencies=[1, 3])
assert not task2.is_ready({1}), "Should not be ready with incomplete dependencies"
assert task2.is_ready({1, 3}), "Should be ready with all dependencies"
print(" ✓ Complex dependency checking works\n")
return True
except Exception as e:
print(f"✗ Task model test failed: {e}\n")
import traceback
traceback.print_exc()
return False
def test_task_parser():
"""Test task parser functionality."""
print("="*60)
print("Test 3: Task Parser")
print("="*60)
try:
from src.parsers.task_parser import parse_tasks
# Test valid JSON
valid_output = '''Here is the plan:
[
{"idx": 1, "tool": "search_tool", "args": {"query": "test"}, "dependencies": []},
{"idx": 2, "tool": "math_tool", "args": {"expression": "2+2"}, "dependencies": [1]}
]
'''
tasks = parse_tasks(valid_output)
assert len(tasks) == 2, "Should parse 2 tasks"
assert tasks[0]["idx"] == 1, "First task idx should be 1"
assert tasks[1]["dependencies"] == [1], "Second task should depend on first"
print(" ✓ Valid JSON parsing works")
# Test invalid JSON
try:
parse_tasks("No JSON here")
print(" ✗ Should have raised error for invalid JSON")
return False
except ValueError:
print(" ✓ Invalid JSON rejection works\n")
return True
except Exception as e:
print(f"✗ Parser test failed: {e}\n")
import traceback
traceback.print_exc()
return False
def test_tools():
"""Test tools system."""
print("="*60)
print("Test 4: Tools System")
print("="*60)
try:
from src.tools.registry import get_all_tools, get_tool_by_name
from src.tools.math import math_tool
# Test tool registry
tools = get_all_tools()
assert len(tools) >= 2, "Should have at least 2 tools"
print(f" ✓ Found {len(tools)} tools in registry")
# Test getting tool by name
math = get_tool_by_name("math_tool")
assert math is not None, "Should find math_tool"
print(" ✓ Tool lookup works")
# Test math tool
result = math_tool.invoke({"expression": "2 + 2"})
assert "4" in result, f"Math result should contain 4, got: {result}"
print(" ✓ Math tool works")
result = math_tool.invoke({"expression": "sqrt(16) * 3"})
assert "12" in result, f"Complex math should work, got: {result}"
print(" ✓ Complex math expressions work\n")
return True
except Exception as e:
print(f"✗ Tools test failed: {e}\n")
import traceback
traceback.print_exc()
return False
def main():
"""Run all tests."""
print("\n" + "="*60)
print("LLMCompiler Test Suite")
print("="*60 + "\n")
results = []
results.append(("Imports", test_imports()))
results.append(("Task Model", test_task_model()))
results.append(("Task Parser", test_task_parser()))
results.append(("Tools System", test_tools()))
# Summary
print("="*60)
print("Test Summary")
print("="*60)
passed = sum(1 for _, result in results if result)
total = len(results)
for name, result in results:
status = "✓ PASS" if result else "✗ FAIL"
print(f"{status}: {name}")
print("\n" + "="*60)
print(f"Results: {passed}/{total} tests passed")
print("="*60 + "\n")
if passed == total:
print("🎉 All tests passed! LLMCompiler is working correctly.")
return 0
else:
print("⚠️ Some tests failed. Please review the errors above.")
return 1
if __name__ == "__main__":
sys.exit(main())