From c0f630c7439a6bc76aab714d0244776ec329e2a1 Mon Sep 17 00:00:00 2001
From: djriffle <djriffle1@gmail.com>
Date: Tue, 8 Jul 2025 15:03:24 -0400
Subject: [PATCH 1/3] added Ollama wrapper class

---
 benchmarking/core/ollama_wrapp.py | 60 +++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 benchmarking/core/ollama_wrapp.py

diff --git a/benchmarking/core/ollama_wrapp.py b/benchmarking/core/ollama_wrapp.py
new file mode 100644
index 0000000..89c7d2e
--- /dev/null
+++ b/benchmarking/core/ollama_wrapp.py
@@ -0,0 +1,60 @@
+"""
+A minimal wrapper that mimics the subset of the OpenAI Python
+client used in Interactive Auto Agent System Tester.
+
+Only implements:
+    client.chat.completions.create(model=..., messages=[...], temperature=...)
+and returns an object whose shape matches the OpenAI response
+access pattern:  resp.choices[0].message.content
+"""
+
+from __future__ import annotations
+import requests
+from types import SimpleNamespace
+from typing import List, Dict, Any
+
+
+class OllamaClient:
+    """
+    Example:
+        client = OllamaClient(host="http://localhost:11434", default_model="llama2")
+        resp   = client.chat.completions.create(model="llama2", messages=[...])
+        print(resp.choices[0].message.content)
+    """
+
+    def __init__(self, host: str = "http://localhost:11434", default_model: str = "llama2"):
+        self._host = host.rstrip("/")
+        self._default_model = default_model
+        # expose nested namespaces so that usage mirrors openai.ChatCompletion
+        self.chat = SimpleNamespace(completions=SimpleNamespace(create=self._chat_create))
+
+    # ------------------------------------------------------------------ #
+    # internal helpers
+    # ------------------------------------------------------------------ #
+    def _chat_create(
+        self,
+        *,
+        model: str | None = None,
+        messages: List[Dict[str, str]],
+        temperature: float | None = None,
+        **kwargs: Any,
+    ):
+        """POST /api/chat and wrap the response to look like OpenAI’s."""
+        payload: dict[str, Any] = {
+            "model": model or self._default_model,
+            "messages": messages,
+        }
+        if temperature is not None:
+            payload["options"] = {"temperature": temperature}
+
+        r = requests.post(f"{self._host}/api/chat", json=payload, timeout=300)
+        r.raise_for_status()
+        data = r.json()                       # -> {'message': {...}, 'done': true}
+
+        content = data["message"]["content"]
+        # fabricate an OpenAI-shaped object tree
+        message = SimpleNamespace(content=content, role="assistant")
+        choice  = SimpleNamespace(message=message, index=0, finish_reason="stop")
+        response_obj = SimpleNamespace(choices=[choice])
+
+        return response_obj
\ No newline at end of file

From 6ddf085a1c807b1ae929f9cb2e1ebf65ee8600ed Mon Sep 17 00:00:00 2001
From: djriffle <djriffle1@gmail.com>
Date: Tue, 8 Jul 2025 15:10:12 -0400
Subject: [PATCH 2/3] added ollama to MultiAgent tester

---
 .../core/{ollama_wrapp.py => ollama_wrapper.py}  |  0
 .../prompt_testing/MultiAgentAutoTester.py       | 16 +++++++++++++---
 benchmarking/prompt_testing/MultiAgentTester.py  | 12 ++++++++++--
 3 files changed, 23 insertions(+), 5 deletions(-)
 rename benchmarking/core/{ollama_wrapp.py => ollama_wrapper.py} (100%)

diff --git a/benchmarking/core/ollama_wrapp.py b/benchmarking/core/ollama_wrapper.py
similarity index 100%
rename from benchmarking/core/ollama_wrapp.py
rename to benchmarking/core/ollama_wrapper.py
diff --git a/benchmarking/prompt_testing/MultiAgentAutoTester.py b/benchmarking/prompt_testing/MultiAgentAutoTester.py
index 4add777..3ea8e5d 100644
--- a/benchmarking/prompt_testing/MultiAgentAutoTester.py
+++ b/benchmarking/prompt_testing/MultiAgentAutoTester.py
@@ -18,13 +18,23 @@
 from typing import List, Tuple, Optional, Dict
 
 from rich.table import Table
+# -- Pick LLM backend ---------------------------------------------------
+from rich.prompt import Prompt
+BACKEND_CHOICE = Prompt.ask(
+    "LLM backend",
+    choices=["chatgpt", "ollama"],
+    default="chatgpt",
+)
 # ── Dependencies ------------------------------------------------------------
 try:
     from dotenv import load_dotenv
-    from openai import OpenAI, APIError
+    if BACKEND_CHOICE == "ollama":
+        from benchmarking.core.ollama_wrapper import OllamaClient as OpenAI
+        APIError = Exception  # Ollama does not have a specific APIError
+    else:
+        from openai import OpenAI, APIError
     import requests
     from rich.console import Console
-    from rich.prompt import Prompt
 except ImportError as e:
     print(f"Missing dependency: {e}", file=sys.stderr)
     sys.exit(1)
@@ -65,7 +75,7 @@
 # 1 · Backend selection
 # ===========================================================================
 backend = Prompt.ask(
-    "Choose backend", choices=["docker", "singularity", "singularity-exec"], default="docker"
+    "Choose sandbox backend", choices=["docker", "singularity", "singularity-exec"], default="docker"
 )
 force_refresh = (
     Prompt.ask("Force refresh environment?", choices=["y", "n"], default="n").lower() == "y"
diff --git a/benchmarking/prompt_testing/MultiAgentTester.py b/benchmarking/prompt_testing/MultiAgentTester.py
index 35ce36b..35a3fa6 100644
--- a/benchmarking/prompt_testing/MultiAgentTester.py
+++ b/benchmarking/prompt_testing/MultiAgentTester.py
@@ -31,6 +31,7 @@
 from pathlib import Path
 from typing import List, Tuple, Optional, Dict
 
+from benchmarking.prompt_testing.MultiAgentAutoTester import BACKEND_CHOICE
 from rich.table import Table
 # ── Dependencies ------------------------------------------------------------
 try:
@@ -78,7 +79,7 @@
 # ===========================================================================
 # 1 · Backend selection
 # ===========================================================================
-backend = Prompt.ask("Choose backend", choices=["docker", "singularity", "singularity-exec"], default="docker")
+backend = Prompt.ask("Choose sandbox backend", choices=["docker", "singularity", "singularity-exec"], default="docker")
 force_refresh = Prompt.ask("Force refresh environment?", choices=["y", "n"], default="n").lower() == "y"
 is_exec_mode = backend == "singularity-exec"
 
@@ -171,7 +172,14 @@ def build_system(a: Agent) -> str:
     display(console, "system", history[0]["content"])
     display(console, "user", first_user)
 
-    openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    if BACKEND_CHOICE == "chatgpt":
+        if not os.getenv("OPENAI_API_KEY"):
+            console.print("[red]OPENAI_API_KEY not set in .env")
+            sys.exit(1)
+        openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    else:
+        # Local Ollama needs no key; model defaults to “llama2”
+        openai = OpenAI()
     current_agent = agent
     turn = 0
 

From 3e129c3e03ed72952c9cd75e1967d4a0ff4c9ced Mon Sep 17 00:00:00 2001
From: djriffle <djriffle1@gmail.com>
Date: Tue, 8 Jul 2025 15:54:14 -0400
Subject: [PATCH 3/3] fixed api connection bugs

---
 benchmarking/core/ollama_wrapper.py           | 63 ++++++++++---------
 .../prompt_testing/MultiAgentAutoTester.py    | 15 ++++-
 .../prompt_testing/MultiAgentTester.py        | 17 ++++-
 3 files changed, 62 insertions(+), 33 deletions(-)

diff --git a/benchmarking/core/ollama_wrapper.py b/benchmarking/core/ollama_wrapper.py
index 89c7d2e..1d796ce 100644
--- a/benchmarking/core/ollama_wrapper.py
+++ b/benchmarking/core/ollama_wrapper.py
@@ -12,7 +12,7 @@
 import requests
 from types import SimpleNamespace
 from typing import List, Dict, Any
-
+import json
 
 class OllamaClient:
     """
@@ -22,9 +22,11 @@ class OllamaClient:
         print(resp.choices[0].message.content)
     """
 
-    def __init__(self, host: str = "http://localhost:11434", default_model: str = "llama2"):
+    def __init__(self, host: str = "http://localhost:11434", model: str = "llama2"):
+        if not host.startswith(("http://", "https://")):          # ← add
+            host = "http://" + host         
         self._host = host.rstrip("/")
-        self._default_model = default_model
+        self._default_model = model
         # expose nested namespaces so that usage mirrors openai.ChatCompletion
         self.chat = SimpleNamespace(completions=SimpleNamespace(create=self._chat_create))
 
@@ -32,29 +34,32 @@ def __init__(self, host: str = "http://localhost:11434", default_model: str = "l
     # internal helpers
     # ------------------------------------------------------------------ #
     def _chat_create(
-        self,
-        *,
-        model: str | None = None,
-        messages: List[Dict[str, str]],
-        temperature: float | None = None,
-        **kwargs: Any,
-    ):
-        """POST /api/chat and wrap the response to look like OpenAI’s."""
-        payload: dict[str, Any] = {
-            "model": model or self._default_model,
-            "messages": messages,
-        }
-        if temperature is not None:
-            payload["options"] = {"temperature": temperature}
-
-        r = requests.post(f"{self._host}/api/chat", json=payload, timeout=300)
-        r.raise_for_status()
-        data = r.json()                       # -> {'message': {...}, 'done': true}
-
-        content = data["message"]["content"]
-        # fabricate an OpenAI-shaped object tree
-        message = SimpleNamespace(content=content, role="assistant")
-        choice  = SimpleNamespace(message=message, index=0, finish_reason="stop")
-        response_obj = SimpleNamespace(choices=[choice])
-
-        return response_obj
\ No newline at end of file
+            self,
+            *,
+            messages: List[Dict[str, str]],
+            temperature: float | None = None,
+            **kwargs: Any,
+        ):
+            payload = {
+                "model": self._default_model,
+                "messages": messages,
+                "stream": False,
+            }
+            if temperature is not None:
+                payload["options"] = {"temperature": temperature}
+
+            r = requests.post(f"{self._host}/api/chat", json=payload, timeout=300)
+            r.raise_for_status()
+
+            # ND-JSON → take the line that has the message
+            for line in r.text.strip().splitlines():
+                obj = json.loads(line)
+                if "message" in obj:
+                    content = obj["message"]["content"]
+                    break
+            else:
+                raise ValueError("No message object found in Ollama response")
+
+            message = SimpleNamespace(content=content, role="assistant")
+            choice  = SimpleNamespace(message=message, index=0, finish_reason="stop")
+            return SimpleNamespace(choices=[choice])
\ No newline at end of file
diff --git a/benchmarking/prompt_testing/MultiAgentAutoTester.py b/benchmarking/prompt_testing/MultiAgentAutoTester.py
index 385b34a..a4ed5e7 100644
--- a/benchmarking/prompt_testing/MultiAgentAutoTester.py
+++ b/benchmarking/prompt_testing/MultiAgentAutoTester.py
@@ -25,6 +25,12 @@
     choices=["chatgpt", "ollama"],
     default="chatgpt",
 )
+OLLAMA_HOST = "http://localhost:11434"
+if BACKEND_CHOICE == "ollama":
+    OLLAMA_HOST = Prompt.ask(
+        "Ollama base URL",
+        default="http://localhost:11434",
+    )
 # ── Dependencies ------------------------------------------------------------
 try:
     from dotenv import load_dotenv
@@ -233,7 +239,14 @@ def build_system(a: Agent) -> str:
     display(console, "system", history[0]["content"])
     display(console, "user", initial_user_message)
 
-    openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    if BACKEND_CHOICE == "chatgpt":
+        if not os.getenv("OPENAI_API_KEY"):
+            console.print("[red]OPENAI_API_KEY not set in .env")
+            sys.exit(1)
+        openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    else:
+        # Local Ollama needs no key; model defaults to “llama2”
+        openai = OpenAI(host=OLLAMA_HOST, model="deepseek-r1:70b")
     current_agent = agent
     turn = 0
 
diff --git a/benchmarking/prompt_testing/MultiAgentTester.py b/benchmarking/prompt_testing/MultiAgentTester.py
index bb665ac..1a5468e 100644
--- a/benchmarking/prompt_testing/MultiAgentTester.py
+++ b/benchmarking/prompt_testing/MultiAgentTester.py
@@ -33,17 +33,28 @@
 
 from benchmarking.prompt_testing.MultiAgentAutoTester import BACKEND_CHOICE
 from rich.table import Table
+from rich.prompt import Prompt
+BACKEND_CHOICE = Prompt.ask(
+    "LLM backend",
+    choices=["chatgpt", "ollama"],
+    default="chatgpt",
+)
+OLLAMA_HOST = "http://localhost:11434"
+if BACKEND_CHOICE == "ollama":
+    OLLAMA_HOST = Prompt.ask(
+        "Ollama base URL",
+        default="http://localhost:11434",
+    )
 # ── Dependencies ------------------------------------------------------------
 try:
     from dotenv import load_dotenv
     from openai import OpenAI, APIError
     import requests
     from rich.console import Console
-    from rich.prompt import Prompt
+
 except ImportError as e:
     print(f"Missing dependency: {e}", file=sys.stderr)
     sys.exit(1)
-
 # ── Agent framework ---------------------------------------------------------
 try:
     from benchmarking.agents.AgentSystem import AgentSystem, Agent
@@ -179,7 +190,7 @@ def build_system(a: Agent) -> str:
         openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
     else:
         # Local Ollama needs no key; model defaults to “llama2”
-        openai = OpenAI()
+        openai = OpenAI(host=OLLAMA_HOST)
     current_agent = agent
     turn = 0