microsoft · KavyaSree2610 · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/.github/workflows/dockerBuildPush.yml b/.github/workflows/dockerBuildPush.yml
@@ -1,6 +1,7 @@
 name: Build and Push to Docker Hub
 
 on:
+  workflow_dispatch:
   push:
     paths:
       - 'src/microbots/environment/local_docker/image_builder/Dockerfile'

diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py
@@ -1,5 +1,6 @@
 import json
 import os
+import re
 from dataclasses import asdict
 from logging import getLogger
 
@@ -47,13 +48,23 @@ def ask(self, message) -> LLMAskResponse:
 
             # Extract text content from response
             response_text = response.content[0].text if response.content else ""
-            logger.debug("Raw Anthropic response (first 500 chars): %s", response_text[:500])
+            logger.debug("Raw Anthropic response length: %d chars", len(response_text))
+            logger.debug("Raw Anthropic response (first 1000 chars): %s", response_text[:1000])
+
+            # Log any control characters found in the response
+            control_chars = [f"\\x{ord(c):02x}" for c in response_text if ord(c) < 0x20 and c not in '\n']
+            if control_chars:
+                logger.debug("Control characters found in response: %s", control_chars[:20])  # Log first 20
 
             # Try to extract JSON if wrapped in markdown code blocks
-            import re
             json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL)
             if json_match:
+                logger.debug("JSON extracted from markdown code block")
                 response_text = json_match.group(1)
+            else:
+                logger.debug("No markdown code block found, using raw response")
+
+            logger.debug("Response text to validate (first 500 chars): %s", repr(response_text[:500]))
 
             valid, askResponse = self._validate_llm_response(response=response_text)
 

diff --git a/src/microbots/llm/llm.py b/src/microbots/llm/llm.py
@@ -6,6 +6,53 @@
 logger = getLogger(__name__)
 
 
+def _escape_control_chars(s: str) -> str:
+    """Escape control characters and fix invalid escape sequences for JSON parsing.
+
+    JSON spec issues that LLMs commonly produce:
+    1. Unescaped control characters (0x00-0x1F) inside strings - tabs, etc.
+    2. Invalid escape sequences like \\& (backslash followed by non-escape char)
+
+    We keep newlines as-is since they're part of JSON structure (between fields).
+    """
+    # Valid JSON escape sequences (after the backslash)
+    valid_json_escapes = set('"\\/bfnrtu')
+
+    result = []
+    i = 0
+    while i < len(s):
+        char = s[i]
+        code = ord(char)
+
+        if code < 0x20 and char != '\n':  # Control char but not newline
+            if char == '\t':
+                result.append('\\t')
+            elif char == '\r':
+                result.append('\\r')
+            else:
+                # Escape other control chars as Unicode
+                result.append(f'\\u{code:04x}')
+        elif char == '\\' and i + 1 < len(s):
+            # Check if this backslash is part of a valid JSON escape
+            next_char = s[i + 1]
+            if next_char in valid_json_escapes:
+                # Valid escape sequence, keep both chars and skip ahead
+                result.append(char)
+                result.append(next_char)
+                i += 2
+                continue
+            else:
+                # Invalid escape sequence - double the backslash
+                result.append('\\\\')
+        elif char == '\\':
+            # Trailing backslash with no next char - double it
+            result.append('\\\\')
+        else:
+            result.append(char)
+        i += 1
+    return ''.join(result)
+
+
 llm_output_format_str = """
 {
     "task_done": <bool>,  // Indicates if the task is completed
@@ -36,10 +83,17 @@ def _validate_llm_response(self, response: str) -> tuple[bool, LLMAskResponse]:
             raise Exception("LLM is not responding in expected format. Maximum retries reached.")
 
         try:
-            response_dict = json.loads(response)
-        except json.JSONDecodeError:
+            logger.debug("Attempting to parse JSON response (length: %d)", len(response))
+            # Sanitize control characters that may appear unescaped in JSON strings
+            # JSON spec forbids unescaped control chars (0x00-0x1F) inside strings
+            # LLMs sometimes output literal tabs/etc in command strings
+            sanitized_response = _escape_control_chars(response)
+            response_dict = json.loads(sanitized_response)
+            logger.debug("JSON parsed successfully")
+        except json.JSONDecodeError as e:
             self.retries += 1
-            logger.warning("LLM response is not valid JSON. Retrying... (%d/%d)", self.retries, self.max_retries)
+            logger.warning("LLM response is not valid JSON. Error: %s. Retrying... (%d/%d)", str(e), self.retries, self.max_retries)
+            logger.debug("Failed response (repr, first 500 chars): %s", repr(response[:500]) if response else "empty")
             self.messages.append({"role": "user", "content": "LLM_RES_ERROR: Please respond in the correct JSON format.\n" + llm_output_format_str})
             return False, None
 

diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py
@@ -467,6 +467,60 @@ def test_multiple_ask_calls_append_messages(self):
         assert user_messages[1]["content"] == "Second question"
         assert user_messages[2]["content"] == "Third question"
 
+    def test_json_with_literal_tab_characters_in_command(self):
+        """Test that responses with literal tab characters in JSON strings are handled.
+
+        This reproduces the issue where LLM returns sed commands with literal tabs:
+        "command": "sed -i '182a\\			pm8001_ha->phy[phy_id]..."
+
+        Those literal tabs (0x09) are invalid inside JSON strings and cause JSONDecodeError.
+        """
+        system_prompt = "You are a helpful assistant"
+        api = AnthropicApi(system_prompt=system_prompt)
+
+        # Mock the Anthropic client response with literal tab characters
+        # This simulates the actual failure scenario from the logs
+        mock_response = Mock()
+        mock_content = Mock()
+        # This string contains LITERAL tab characters (not \t escape sequences)
+        # The tabs appear after the backslash in the sed command
+        json_with_tabs = '{"task_done": false, "thoughts": "Adding line with sed", "command": "sed -i \'182a\\			pm8001_ha->phy[phy_id].enable_completion = \\&completion;\' file.c"}'
+        mock_content.text = json_with_tabs
+        mock_response.content = [mock_content]
+        api.ai_client.messages.create = Mock(return_value=mock_response)
+
+        result = api.ask("Add a line to the file")
+        assert isinstance(result, LLMAskResponse)
+        assert "sed" in result.command
+
+    def test_json_with_various_control_characters(self):
+        """Test handling of various control characters that may appear in LLM responses."""
+        system_prompt = "You are a helpful assistant"
+        api = AnthropicApi(system_prompt=system_prompt)
+
+        # Test cases with different control characters
+        test_cases = [
+            # (description, json_string_with_control_char)
+            ("literal tab in command", 
+             '{"task_done": false, "thoughts": "test", "command": "echo\thello"}'),
+            ("literal carriage return", 
+             '{"task_done": false, "thoughts": "test", "command": "echo\rhello"}'),
+            ("multiple tabs", 
+             '{"task_done": false, "thoughts": "indented\t\t\tcode", "command": "pwd"}'),
+        ]
+
+        for desc, json_with_ctrl_char in test_cases:
+            mock_response = Mock()
+            mock_content = Mock()
+            mock_content.text = json_with_ctrl_char
+            mock_response.content = [mock_content]
+            api.ai_client.messages.create = Mock(return_value=mock_response)
+            api.retries = 0  # Reset retries
+            api.messages = []  # Clear messages
+
+            result = api.ask("test")
+            assert isinstance(result, LLMAskResponse), f"Failed for case: {desc}"
+
 
 @pytest.mark.anthropic_integration
 class TestAnthropicApiIntegration:

diff --git a/test/llm/test_llm.py b/test/llm/test_llm.py
@@ -9,7 +9,7 @@
 # Add src to path for imports
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")))
 
-from microbots.llm.llm import LLMInterface, LLMAskResponse, llm_output_format_str
+from microbots.llm.llm import LLMInterface, LLMAskResponse, llm_output_format_str, _escape_control_chars
 
 
 class ConcreteLLM(LLMInterface):
@@ -742,4 +742,74 @@ def test_task_done_true_with_not_none_command_field(self, llm):
         assert llm_response is None
         assert llm.retries == 1
         assert len(llm.messages) == 1
-        assert "When 'task_done' is true, 'command' should be an empty string." in llm.messages[0]["content"]
+        assert "When 'task_done' is true, 'command' should be an empty string." in llm.messages[0]["content"]
+
+
+@pytest.mark.unit
+class TestEscapeControlChars:
+    """Tests for _escape_control_chars helper function"""
+
+    def test_literal_tab_escaped(self):
+        """Literal tab (0x09) should be escaped to \\t"""
+        s = '{"command": "echo\thello"}'
+        result = _escape_control_chars(s)
+        parsed = json.loads(result)
+        assert parsed["command"] == "echo\thello"
+
+    def test_literal_carriage_return_escaped(self):
+        """Literal CR (0x0D) should be escaped to \\r"""
+        s = '{"command": "echo\rhello"}'
+        result = _escape_control_chars(s)
+        parsed = json.loads(result)
+        assert parsed["command"] == "echo\rhello"
+
+    def test_other_control_chars_escaped_as_unicode(self):
+        """Control chars other than tab/CR should be escaped as \\uXXXX"""
+        # Bell character (0x07)
+        s = '{"command": "echo' + chr(0x07) + 'hello"}'
+        result = _escape_control_chars(s)
+        parsed = json.loads(result)
+        assert parsed["command"] == "echo\x07hello"
+
+    def test_invalid_escape_sequence_doubled(self):
+        """Backslash followed by invalid escape char should be double-escaped"""
+        # \& is not a valid JSON escape
+        s = '{"command": "echo ' + chr(92) + '& hello"}'
+        result = _escape_control_chars(s)
+        parsed = json.loads(result)
+        assert parsed["command"] == "echo \\& hello"
+
+    def test_valid_escape_sequences_preserved(self):
+        """Valid JSON escape sequences (\\n, \\\\, \\\") should be left intact"""
+        s = '{"thoughts": "line1\\nline2", "command": "echo\\\\done"}'
+        result = _escape_control_chars(s)
+        parsed = json.loads(result)
+        assert parsed["thoughts"] == "line1\nline2"
+        assert parsed["command"] == "echo\\done"
+
+    def test_no_control_chars_passthrough(self):
+        """Strings with no control chars or bad escapes pass through unchanged"""
+        s = '{"task_done": false, "thoughts": "all good", "command": "ls -la"}'
+        result = _escape_control_chars(s)
+        assert result == s
+        parsed = json.loads(result)
+        assert parsed["command"] == "ls -la"
+
+    def test_newlines_preserved(self):
+        """Newlines should be kept as-is since they are JSON structural separators"""
+        s = '{\n"command": "pwd"\n}'
+        result = _escape_control_chars(s)
+        assert '\n' in result
+
+    def test_multiple_control_chars_in_one_string(self):
+        """Multiple different control chars should all be escaped"""
+        s = '{"thoughts": "indented\t\t\tcode", "command": "pwd"}'
+        result = _escape_control_chars(s)
+        parsed = json.loads(result)
+        assert parsed["thoughts"] == "indented\t\t\tcode"
+
+    def test_trailing_lone_backslash(self):
+        """A lone backslash at the end of the string should be double-escaped"""
+        s = 'test' + chr(92)
+        result = _escape_control_chars(s)
+        assert result.endswith('\\\\')