diff --git a/benchmarking/core/ollama_wrapper.py b/benchmarking/core/ollama_wrapper.py new file mode 100644 index 0000000..1d796ce --- /dev/null +++ b/benchmarking/core/ollama_wrapper.py @@ -0,0 +1,65 @@ +""" +A minimal wrapper that mimics the subset of the OpenAI Python +client used in Interactive Auto Agent System Tester. + +Only implements: + client.chat.completions.create(model=..., messages=[...], temperature=...) +and returns an object whose shape matches the OpenAI response +access pattern: resp.choices[0].message.content +""" + +from __future__ import annotations +import requests +from types import SimpleNamespace +from typing import List, Dict, Any +import json + +class OllamaClient: + """ + Example: + client = OllamaClient(host="http://localhost:11434", default_model="llama2") + resp = client.chat.completions.create(model="llama2", messages=[...]) + print(resp.choices[0].message.content) + """ + + def __init__(self, host: str = "http://localhost:11434", model: str = "llama2"): + if not host.startswith(("http://", "https://")): # ← add + host = "http://" + host + self._host = host.rstrip("/") + self._default_model = model + # expose nested namespaces so that usage mirrors openai.ChatCompletion + self.chat = SimpleNamespace(completions=SimpleNamespace(create=self._chat_create)) + + # ------------------------------------------------------------------ # + # internal helpers + # ------------------------------------------------------------------ # + def _chat_create( + self, + *, + messages: List[Dict[str, str]], + temperature: float | None = None, + **kwargs: Any, + ): + payload = { + "model": self._default_model, + "messages": messages, + "stream": False, + } + if temperature is not None: + payload["options"] = {"temperature": temperature} + + r = requests.post(f"{self._host}/api/chat", json=payload, timeout=300) + r.raise_for_status() + + # ND-JSON → take the line that has the message + for line in r.text.strip().splitlines(): + obj = json.loads(line) + if "message" in obj: + content = obj["message"]["content"] + break + else: + raise ValueError("No message object found in Ollama response") + + message = SimpleNamespace(content=content, role="assistant") + choice = SimpleNamespace(message=message, index=0, finish_reason="stop") + return SimpleNamespace(choices=[choice]) \ No newline at end of file diff --git a/benchmarking/prompt_testing/MultiAgentAutoTester.py b/benchmarking/prompt_testing/MultiAgentAutoTester.py index 7bff335..a4ed5e7 100644 --- a/benchmarking/prompt_testing/MultiAgentAutoTester.py +++ b/benchmarking/prompt_testing/MultiAgentAutoTester.py @@ -18,13 +18,29 @@ from typing import List, Tuple, Optional, Dict from rich.table import Table +# -- Pick LLM backend --------------------------------------------------- +from rich.prompt import Prompt +BACKEND_CHOICE = Prompt.ask( + "LLM backend", + choices=["chatgpt", "ollama"], + default="chatgpt", +) +OLLAMA_HOST = "http://localhost:11434" +if BACKEND_CHOICE == "ollama": + OLLAMA_HOST = Prompt.ask( + "Ollama base URL", + default="http://localhost:11434", + ) # ── Dependencies ------------------------------------------------------------ try: from dotenv import load_dotenv - from openai import OpenAI, APIError + if BACKEND_CHOICE == "ollama": + from benchmarking.core.ollama_wrapper import OllamaClient as OpenAI + APIError = Exception # Ollama does not have a specific APIError + else: + from openai import OpenAI, APIError import requests from rich.console import Console - from rich.prompt import Prompt except ImportError as e: print(f"Missing dependency: {e}", file=sys.stderr) sys.exit(1) @@ -104,7 +120,7 @@ def _save_benchmark_record(*, run_id: str, results: dict, meta: dict, code: str # 1 · Backend selection # =========================================================================== backend = Prompt.ask( - "Choose backend", choices=["docker", "singularity", "singularity-exec"], default="docker" + "Choose sandbox backend", choices=["docker", "singularity", "singularity-exec"], default="docker" ) force_refresh = ( Prompt.ask("Force refresh environment?", choices=["y", "n"], default="n").lower() == "y" @@ -223,7 +239,14 @@ def build_system(a: Agent) -> str: display(console, "system", history[0]["content"]) display(console, "user", initial_user_message) - openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + if BACKEND_CHOICE == "chatgpt": + if not os.getenv("OPENAI_API_KEY"): + console.print("[red]OPENAI_API_KEY not set in .env") + sys.exit(1) + openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + else: + # Local Ollama needs no key; model defaults to “llama2” + openai = OpenAI(host=OLLAMA_HOST, model="deepseek-r1:70b") current_agent = agent turn = 0 diff --git a/benchmarking/prompt_testing/MultiAgentTester.py b/benchmarking/prompt_testing/MultiAgentTester.py index 3617330..1a5468e 100644 --- a/benchmarking/prompt_testing/MultiAgentTester.py +++ b/benchmarking/prompt_testing/MultiAgentTester.py @@ -31,18 +31,30 @@ from pathlib import Path from typing import List, Tuple, Optional, Dict +from benchmarking.prompt_testing.MultiAgentAutoTester import BACKEND_CHOICE from rich.table import Table +from rich.prompt import Prompt +BACKEND_CHOICE = Prompt.ask( + "LLM backend", + choices=["chatgpt", "ollama"], + default="chatgpt", +) +OLLAMA_HOST = "http://localhost:11434" +if BACKEND_CHOICE == "ollama": + OLLAMA_HOST = Prompt.ask( + "Ollama base URL", + default="http://localhost:11434", + ) # ── Dependencies ------------------------------------------------------------ try: from dotenv import load_dotenv from openai import OpenAI, APIError import requests from rich.console import Console - from rich.prompt import Prompt + except ImportError as e: print(f"Missing dependency: {e}", file=sys.stderr) sys.exit(1) - # ── Agent framework --------------------------------------------------------- try: from benchmarking.agents.AgentSystem import AgentSystem, Agent @@ -78,7 +90,7 @@ # =========================================================================== # 1 · Backend selection # =========================================================================== -backend = Prompt.ask("Choose backend", choices=["docker", "singularity", "singularity-exec"], default="docker") +backend = Prompt.ask("Choose sandbox backend", choices=["docker", "singularity", "singularity-exec"], default="docker") force_refresh = Prompt.ask("Force refresh environment?", choices=["y", "n"], default="n").lower() == "y" is_exec_mode = backend == "singularity-exec" @@ -171,7 +183,14 @@ def build_system(a: Agent) -> str: display(console, "system", history[0]["content"]) display(console, "user", first_user) - openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + if BACKEND_CHOICE == "chatgpt": + if not os.getenv("OPENAI_API_KEY"): + console.print("[red]OPENAI_API_KEY not set in .env") + sys.exit(1) + openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + else: + # Local Ollama needs no key; model defaults to “llama2” + openai = OpenAI(host=OLLAMA_HOST) current_agent = agent turn = 0