microsoft · 0xba1a · Dec 3, 2025 · Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -13,15 +13,30 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        test-type: ["unit", "integration"]
+        # Installing ollama model in GitHub Actions runner requires significant disk space.
+        # It reduces the space available for browser-based tests
+        test-type: ["unit", "integration", "ollama_local"]
         include:
           - test-type: "unit"
             pytest-args: "-m 'unit'"
           - test-type: "integration"
             pytest-args: "-m 'integration'"
+          - test-type: "ollama_local"
+            pytest-args: "-m 'ollama_local'"
 
 
     steps:
+
+      # Keeping it here when we need to free up space in future
+      # - name: Free up space
+      #   uses: jlumbroso/free-disk-space@main
+      #   with:
+      #     tool-cache: true
+      #     android: true
+      #     dotnet: true
+      #     haskell: true
+      #     large-packages: true
+
       - name: Checkout code
         uses: actions/checkout@v4
 
@@ -31,7 +46,7 @@ jobs:
           python-version: "3.12"
 
       - name: Set up Docker Buildx
-        if: matrix.test-type == 'integration'
+        if: matrix.test-type != 'unit'
         uses: docker/setup-buildx-action@v3
 
       - name: Cache pip dependencies
@@ -58,11 +73,32 @@ jobs:
           pip install -e .
 
       - name: Build Docker images for integration tests
-        if: matrix.test-type == 'integration'
+        if: matrix.test-type != 'unit'
         run: |
           # Build the shell server image needed for Docker tests
           docker build -f src/microbots/environment/local_docker/image_builder/Dockerfile -t kavyasree261002/shell_server:latest .
 
+      - name: Check disk space before ollama installation
+        if: matrix.test-type == 'ollama_local'
+        run: df -h
+
+      - name: Run model
+        uses: ai-action/ollama-action@v1
+        id: model
+        if: matrix.test-type == 'ollama_local'
+        with:
+          model: qwen2.5-coder:latest
+          prompt: Hi, Are you running? What is your model name?
+
+      - name: Check disk space after ollama installation
+        if: matrix.test-type == 'ollama_local'
+        run: df -h
+
+      - name: Print response
+        run: echo "$response"
+        env:
+          response: ${{ steps.model.outputs.response }}
+
       - name: Run ${{ matrix.test-type }} tests
         env:
           # OpenAI API Configuration

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,6 +1,8 @@
 {
     "cSpell.words": [
         "microbot",
-        "microbots"
+        "microbots",
+        "ollama",
+        "qwen"
     ]
 }
diff --git a/src/microbots/MicroBot.py b/src/microbots/MicroBot.py
@@ -11,6 +11,7 @@
     LocalDockerEnvironment,
 )
 from microbots.llm.openai_api import OpenAIApi
+from microbots.llm.ollama_local import OllamaLocal
 from microbots.llm.llm import llm_output_format_str
 from microbots.tools.tool import Tool, install_tools, setup_tools
 from microbots.extras.mount import Mount, MountType
@@ -19,15 +20,30 @@
 
 logger = getLogger(" MicroBot ")
 
-system_prompt_common = f"""There is a shell session open for you.
-                I will provide a task to achieve using the shell.
-                You will provide the commands to achieve the task in this particular below json format, Ensure all the time to respond in this format only and nothing else, also all the properties ( task_done, command, result ) are mandatory on each response
-                {llm_output_format_str}
-                after each command I will provide the output of the command.
-                ensure to run only one command at a time.
-                NEVER use 'ls -R', 'tree', or 'find' without -maxdepth on large repos - use targeted paths like 'ls drivers/block/' to avoid exceeding context limits.
-                Use specific patterns: 'find <path> -name "*.c" -maxdepth 2' instead of recursive exploration.
-                I won't be able to intervene once I have given task."""
+system_prompt_common = f"""
+You are a helpful agent well versed in software development and debugging.
+
+You will be provided with a coding or debugging task to complete inside a sandboxed shell environment.
+There is a shell session open for you.
+You will be provided with a task and you should achieve it using the shell commands.
+All your response must be in the following json format:
+{llm_output_format_str}
+The properties ( task_done, thoughts, command ) are mandatory on each response.
+Give the command one at a time to solve the given task. As long as you're not done with the task, set task_done to false.
+When you are sure that the task is completed, set task_done to true, set command to empty string and provide your final thoughts in the thoughts field.
+Don't add any chat or extra messages outside the json format. Because the system will parse only the json response.
+Any of your thoughts must be in the 'thoughts' field.
+
+after each command, the system will execute the command and respond to you with the output.
+Ensure to run only one command at a time.
+NEVER use commands that produce large amounts of output or take a long time to run to avoid exceeding context limits.
+Use specific patterns: 'find <path> -name "*.c" -maxdepth 2' instead of recursive exploration.
+No human is involved in the task. So, don't seek human intervention.
+
+Remember following important points
+1. If a command fails, analyze the error message and provide an alternative command in your next response. Same command will not pass again.
+2. Avoid using recursive commands like 'ls -R', 'rm -rf', 'tree', or 'find' without depth limits as they can produce excessive output or be destructive.
+"""
 
 
 class BotType(StrEnum):
@@ -224,7 +240,7 @@ def run(
             llm_response = self.llm.ask(output_text)
 
         logger.info("🔚 TASK COMPLETED : %s...", task[0:15])
-        return BotRunResult(status=True, result=llm_response.result, error=None)
+        return BotRunResult(status=True, result=llm_response.thoughts, error=None)
 
     def _mount_additional(self, mount: Mount):
         if mount.mount_type != MountType.COPY:
@@ -259,6 +275,11 @@ def _create_llm(self):
             self.llm = OpenAIApi(
                 system_prompt=self.system_prompt, deployment_name=self.deployment_name
             )
+        elif self.model_provider == ModelProvider.OLLAMA_LOCAL:
+            self.llm = OllamaLocal(
+                system_prompt=self.system_prompt, model_name=self.deployment_name
+            )
+        # No Else case required as model provider is already validated using _validate_model_and_provider
 
     def _validate_model_and_provider(self, model):
         # Ensure it has only only slash

diff --git a/src/microbots/constants.py b/src/microbots/constants.py
@@ -4,6 +4,7 @@
 
 class ModelProvider(StrEnum):
     OPENAI = "azure-openai"
+    OLLAMA_LOCAL = "ollama-local"
 
 
 class ModelEnum(StrEnum):

diff --git a/src/microbots/llm/llm.py b/src/microbots/llm/llm.py
@@ -5,20 +5,21 @@
 
 logger = getLogger(__name__)
 
-@dataclass
-class LLMAskResponse:
-    task_done: bool = False
-    command: str = ""
-    result: str | None = None
 
 llm_output_format_str = """
 {
     "task_done": <bool>,  // Indicates if the task is completed
-    "command": <str>,     // The command to be executed
-    "result": <str|null>  // The result of the command execution, null if not applicable
+    "thoughts": <str>,     // The reasoning behind the decision
+    "command": <str>     // The command to be executed
 }
 """
 
+@dataclass
+class LLMAskResponse:
+    task_done: bool = False
+    thoughts: str = ""
+    command: str = ""
+
 class LLMInterface(ABC):
     @abstractmethod
     def ask(self, message: str) -> LLMAskResponse:
@@ -75,7 +76,7 @@ def _validate_llm_response(self, response: str) -> tuple[bool, LLMAskResponse]:
             llm_response = LLMAskResponse(
                 task_done=response_dict["task_done"],
                 command=response_dict["command"],
-                result=response_dict.get("result"),
+                thoughts=response_dict.get("thoughts"),
             )
             return True, llm_response
         else:

diff --git a/src/microbots/llm/ollama_local.py b/src/microbots/llm/ollama_local.py
@@ -0,0 +1,126 @@
+###############################################################################
+################### Ollama Local LLM Interface Setup ##########################
+###############################################################################
+#
+# Install Ollama from https://ollama.com/
+# ```
+# curl -fsSL https://ollama.com/install.sh | sh
+# ollama --version
+# ```
+#
+# Pull and run a local model (e.g., qwen3-coder:latest)
+# ```
+# ollama pull qwen3-coder:latest
+# ollama serve qwen3-coder:latest --port 11434
+# ```
+#
+# Set environment variables in a .env file or your system environment:
+# ```
+# LOCAL_MODEL_NAME=qwen3-coder:latest
+# LOCAL_MODEL_PORT=11434
+# ```
+#
+# To use with Microbot, define your Microbot as following
+# ```python
+# bot = Microbot(
+#   model="ollama-local/qwen3-coder:latest",
+#   folder_to_mount=str(test_repo)
+#   )
+# ```
+###############################################################################
+
+import json
+import os
+from dataclasses import asdict
+
+from dotenv import load_dotenv
+from microbots.llm.llm import LLMAskResponse, LLMInterface, llm_output_format_str
+import requests
+import logging
+
+logger = logging.getLogger(__name__)
+
+load_dotenv()
+
+class OllamaLocal(LLMInterface):
+    def __init__(self, system_prompt, model_name=None, model_port=None, max_retries=3):
+        self.model_name = model_name or os.environ.get("LOCAL_MODEL_NAME")
+        self.model_port = model_port or os.environ.get("LOCAL_MODEL_PORT")
+        self.system_prompt = system_prompt
+        self.messages = [{"role": "system", "content": system_prompt}]
+
+        if not self.model_name or not self.model_port:
+            raise ValueError("LOCAL_MODEL_NAME and LOCAL_MODEL_PORT environment variables must be set or passed as arguments to OllamaLocal.")
+
+        # Set these values here. This logic will be handled in the parent class.
+        self.max_retries = max_retries
+        self.retries = 0
+
+    def ask(self, message) -> LLMAskResponse:
+        self.retries = 0  # reset retries for each ask. Handled in parent class.
+
+        self.messages.append({"role": "user", "content": message})
+
+        # TODO: If the retry count is maintained here, all the wrong responses from the history
+        # can be removed. It will be a natural history cleaning process.
+        valid = False
+        while not valid and self.retries < self.max_retries:
+            response = self._send_request_to_local_model(self.messages)
+            self.messages.append({"role": "assistant", "content": response})
+            valid, askResponse = self._validate_llm_response(response=response)
+
+        if not valid and self.retries >= self.max_retries:
+            raise Exception("Max retries reached. Failed to get valid response from local model.")
+
+        # Remove last assistant message and replace with structured response
+        self.messages.pop()
+        self.messages.append({"role": "assistant", "content": json.dumps(asdict(askResponse))})
+
+        return askResponse
+
+    def clear_history(self):
+        self.messages = [
+            {
+                "role": "system",
+                "content": self.system_prompt,
+            }
+        ]
+        return True
+
+    def _send_request_to_local_model(self, messages):
+        logger.debug(f"Sending request to local model {self.model_name} at port {self.model_port}")
+        logger.debug(f"Messages: {messages}")
+        server = f"http://localhost:{self.model_port}/api/generate"
+        payload = {
+            "model": self.model_name,
+            "prompt": json.dumps(messages),
+            "stream": False
+        }
+        headers = {
+            "Content-Type": "application/json"
+        }
+        # Set timeout: 30 seconds connect, 600 seconds read to handle model cold start
+        response = requests.post(server, json=payload, headers=headers, timeout=(30, 600))
+        logger.debug(f"\nResponse Code: {response.status_code}\nResponse Text:\n{response.text}\n---")
+        if response.status_code == 200:
+            response_json = response.json()
+            logger.debug(f"\nResponse JSON: {response_json}")
+            return response_json.get("response", "")
+        else:
+            raise Exception(f"Error from local model server: {response.status_code} - {response.text}")
+
+    def _validate_llm_response(self, response):
+        # However, as instructed, Ollama is not providing the response only in JSON.
+        # It adds some extra text above or below the JSON sometimes.
+        # So, this hack extracts the JSON part from the response.
+        try:
+            response = response.split("{", 1)[1]
+            response = "{" + response.rsplit("}", 1)[0] + "}"
+        except Exception as e:
+            self.retries += 1
+            logger.warning("No JSON in LLM response.\nException: %s\nRetrying... (%d/%d)", e, self.retries, self.max_retries)
+            self.messages.append({"role": "user", "content": "LLM_RES_ERROR: Please respond in the following JSON format.\n" + llm_output_format_str})
+            return False, None
+
+        logger.debug(f"\nResponse from local model: {response}")
+        return super()._validate_llm_response(response)
diff --git a/src/microbots/llm/openai_api.py b/src/microbots/llm/openai_api.py
@@ -36,8 +36,11 @@ def ask(self, message) -> LLMAskResponse:
                 model=self.deployment_name,
                 input=self.messages,
             )
+            self.messages.append({"role": "assistant", "content": response.output_text})
             valid, askResponse = self._validate_llm_response(response=response.output_text)
 
+        # Remove last assistant message and replace with structured response
+        self.messages.pop()
         self.messages.append({"role": "assistant", "content": json.dumps(asdict(askResponse))})
 
         return askResponse