OpenTechBio · djriffle · Jul 15, 2025 · Jul 8, 2025 · Jul 8, 2025 · Jul 8, 2025
diff --git a/benchmarking/core/ollama_wrapper.py b/benchmarking/core/ollama_wrapper.py
@@ -0,0 +1,65 @@
+"""
+A minimal wrapper that mimics the subset of the OpenAI Python
+client used in Interactive Auto Agent System Tester.
+
+Only implements:
+    client.chat.completions.create(model=..., messages=[...], temperature=...)
+and returns an object whose shape matches the OpenAI response
+access pattern:  resp.choices[0].message.content
+"""
+
+from __future__ import annotations
+import requests
+from types import SimpleNamespace
+from typing import List, Dict, Any
+import json
+
+class OllamaClient:
+    """
+    Example:
+        client = OllamaClient(host="http://localhost:11434", default_model="llama2")
+        resp   = client.chat.completions.create(model="llama2", messages=[...])
+        print(resp.choices[0].message.content)
+    """
+
+    def __init__(self, host: str = "http://localhost:11434", model: str = "llama2"):
+        if not host.startswith(("http://", "https://")):          # ← add
+            host = "http://" + host         
+        self._host = host.rstrip("/")
+        self._default_model = model
+        # expose nested namespaces so that usage mirrors openai.ChatCompletion
+        self.chat = SimpleNamespace(completions=SimpleNamespace(create=self._chat_create))
+
+    # ------------------------------------------------------------------ #
+    # internal helpers
+    # ------------------------------------------------------------------ #
+    def _chat_create(
+            self,
+            *,
+            messages: List[Dict[str, str]],
+            temperature: float | None = None,
+            **kwargs: Any,
+        ):
+            payload = {
+                "model": self._default_model,
+                "messages": messages,
+                "stream": False,
+            }
+            if temperature is not None:
+                payload["options"] = {"temperature": temperature}
+
+            r = requests.post(f"{self._host}/api/chat", json=payload, timeout=300)
+            r.raise_for_status()
+
+            # ND-JSON → take the line that has the message
+            for line in r.text.strip().splitlines():
+                obj = json.loads(line)
+                if "message" in obj:
+                    content = obj["message"]["content"]
+                    break
+            else:
+                raise ValueError("No message object found in Ollama response")
+
+            message = SimpleNamespace(content=content, role="assistant")
+            choice  = SimpleNamespace(message=message, index=0, finish_reason="stop")
+            return SimpleNamespace(choices=[choice])
diff --git a/benchmarking/prompt_testing/MultiAgentAutoTester.py b/benchmarking/prompt_testing/MultiAgentAutoTester.py
@@ -18,13 +18,29 @@
 from typing import List, Tuple, Optional, Dict
 
 from rich.table import Table
+# -- Pick LLM backend ---------------------------------------------------
+from rich.prompt import Prompt
+BACKEND_CHOICE = Prompt.ask(
+    "LLM backend",
+    choices=["chatgpt", "ollama"],
+    default="chatgpt",
+)
+OLLAMA_HOST = "http://localhost:11434"
+if BACKEND_CHOICE == "ollama":
+    OLLAMA_HOST = Prompt.ask(
+        "Ollama base URL",
+        default="http://localhost:11434",
+    )
 # ── Dependencies ------------------------------------------------------------
 try:
     from dotenv import load_dotenv
-    from openai import OpenAI, APIError
+    if BACKEND_CHOICE == "ollama":
+        from benchmarking.core.ollama_wrapper import OllamaClient as OpenAI
+        APIError = Exception  # Ollama does not have a specific APIError
+    else:
+        from openai import OpenAI, APIError
     import requests
     from rich.console import Console
-    from rich.prompt import Prompt
 except ImportError as e:
     print(f"Missing dependency: {e}", file=sys.stderr)
     sys.exit(1)
@@ -104,7 +120,7 @@ def _save_benchmark_record(*, run_id: str, results: dict, meta: dict, code: str
 # 1 · Backend selection
 # ===========================================================================
 backend = Prompt.ask(
-    "Choose backend", choices=["docker", "singularity", "singularity-exec"], default="docker"
+    "Choose sandbox backend", choices=["docker", "singularity", "singularity-exec"], default="docker"
 )
 force_refresh = (
     Prompt.ask("Force refresh environment?", choices=["y", "n"], default="n").lower() == "y"
@@ -223,7 +239,14 @@ def build_system(a: Agent) -> str:
     display(console, "system", history[0]["content"])
     display(console, "user", initial_user_message)
 
-    openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    if BACKEND_CHOICE == "chatgpt":
+        if not os.getenv("OPENAI_API_KEY"):
+            console.print("[red]OPENAI_API_KEY not set in .env")
+            sys.exit(1)
+        openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    else:
+        # Local Ollama needs no key; model defaults to “llama2”
+        openai = OpenAI(host=OLLAMA_HOST, model="deepseek-r1:70b")
     current_agent = agent
     turn = 0
 

diff --git a/benchmarking/prompt_testing/MultiAgentTester.py b/benchmarking/prompt_testing/MultiAgentTester.py
@@ -31,18 +31,30 @@
 from pathlib import Path
 from typing import List, Tuple, Optional, Dict
 
+from benchmarking.prompt_testing.MultiAgentAutoTester import BACKEND_CHOICE
 from rich.table import Table
+from rich.prompt import Prompt
+BACKEND_CHOICE = Prompt.ask(
+    "LLM backend",
+    choices=["chatgpt", "ollama"],
+    default="chatgpt",
+)
+OLLAMA_HOST = "http://localhost:11434"
+if BACKEND_CHOICE == "ollama":
+    OLLAMA_HOST = Prompt.ask(
+        "Ollama base URL",
+        default="http://localhost:11434",
+    )
 # ── Dependencies ------------------------------------------------------------
 try:
     from dotenv import load_dotenv
     from openai import OpenAI, APIError
     import requests
     from rich.console import Console
-    from rich.prompt import Prompt
+
 except ImportError as e:
     print(f"Missing dependency: {e}", file=sys.stderr)
     sys.exit(1)
-
 # ── Agent framework ---------------------------------------------------------
 try:
     from benchmarking.agents.AgentSystem import AgentSystem, Agent
@@ -78,7 +90,7 @@
 # ===========================================================================
 # 1 · Backend selection
 # ===========================================================================
-backend = Prompt.ask("Choose backend", choices=["docker", "singularity", "singularity-exec"], default="docker")
+backend = Prompt.ask("Choose sandbox backend", choices=["docker", "singularity", "singularity-exec"], default="docker")
 force_refresh = Prompt.ask("Force refresh environment?", choices=["y", "n"], default="n").lower() == "y"
 is_exec_mode = backend == "singularity-exec"
 
@@ -171,7 +183,14 @@ def build_system(a: Agent) -> str:
     display(console, "system", history[0]["content"])
     display(console, "user", first_user)
 
-    openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    if BACKEND_CHOICE == "chatgpt":
+        if not os.getenv("OPENAI_API_KEY"):
+            console.print("[red]OPENAI_API_KEY not set in .env")
+            sys.exit(1)
+        openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    else:
+        # Local Ollama needs no key; model defaults to “llama2”
+        openai = OpenAI(host=OLLAMA_HOST)
     current_agent = agent
     turn = 0