Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/dockerBuildPush.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: Build and Push to Docker Hub

on:
workflow_dispatch:
push:
paths:
- 'src/microbots/environment/local_docker/image_builder/Dockerfile'
Expand Down
15 changes: 13 additions & 2 deletions src/microbots/llm/anthropic_api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import os
import re
from dataclasses import asdict
from logging import getLogger

Expand Down Expand Up @@ -47,13 +48,23 @@ def ask(self, message) -> LLMAskResponse:

# Extract text content from response
response_text = response.content[0].text if response.content else ""
logger.debug("Raw Anthropic response (first 500 chars): %s", response_text[:500])
logger.debug("Raw Anthropic response length: %d chars", len(response_text))
logger.debug("Raw Anthropic response (first 1000 chars): %s", response_text[:1000])

# Log any control characters found in the response
control_chars = [f"\\x{ord(c):02x}" for c in response_text if ord(c) < 0x20 and c not in '\n']
if control_chars:
logger.debug("Control characters found in response: %s", control_chars[:20]) # Log first 20

# Try to extract JSON if wrapped in markdown code blocks
import re
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL)
if json_match:
logger.debug("JSON extracted from markdown code block")
response_text = json_match.group(1)
else:
logger.debug("No markdown code block found, using raw response")

logger.debug("Response text to validate (first 500 chars): %s", repr(response_text[:500]))

valid, askResponse = self._validate_llm_response(response=response_text)

Expand Down
60 changes: 57 additions & 3 deletions src/microbots/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,53 @@
logger = getLogger(__name__)


def _escape_control_chars(s: str) -> str:
"""Escape control characters and fix invalid escape sequences for JSON parsing.

JSON spec issues that LLMs commonly produce:
1. Unescaped control characters (0x00-0x1F) inside strings - tabs, etc.
2. Invalid escape sequences like \\& (backslash followed by non-escape char)

We keep newlines as-is since they're part of JSON structure (between fields).
"""
# Valid JSON escape sequences (after the backslash)
valid_json_escapes = set('"\\/bfnrtu')

result = []
i = 0
while i < len(s):
char = s[i]
code = ord(char)

if code < 0x20 and char != '\n': # Control char but not newline
if char == '\t':
result.append('\\t')
elif char == '\r':
result.append('\\r')
else:
# Escape other control chars as Unicode
result.append(f'\\u{code:04x}')
elif char == '\\' and i + 1 < len(s):
# Check if this backslash is part of a valid JSON escape
next_char = s[i + 1]
if next_char in valid_json_escapes:
# Valid escape sequence, keep both chars and skip ahead
result.append(char)
result.append(next_char)
i += 2
continue
else:
# Invalid escape sequence - double the backslash
result.append('\\\\')
elif char == '\\':
# Trailing backslash with no next char - double it
result.append('\\\\')
else:
result.append(char)
i += 1
return ''.join(result)


llm_output_format_str = """
{
"task_done": <bool>, // Indicates if the task is completed
Expand Down Expand Up @@ -36,10 +83,17 @@ def _validate_llm_response(self, response: str) -> tuple[bool, LLMAskResponse]:
raise Exception("LLM is not responding in expected format. Maximum retries reached.")

try:
response_dict = json.loads(response)
except json.JSONDecodeError:
logger.debug("Attempting to parse JSON response (length: %d)", len(response))
# Sanitize control characters that may appear unescaped in JSON strings
# JSON spec forbids unescaped control chars (0x00-0x1F) inside strings
# LLMs sometimes output literal tabs/etc in command strings
sanitized_response = _escape_control_chars(response)
response_dict = json.loads(sanitized_response)
logger.debug("JSON parsed successfully")
except json.JSONDecodeError as e:
self.retries += 1
logger.warning("LLM response is not valid JSON. Retrying... (%d/%d)", self.retries, self.max_retries)
logger.warning("LLM response is not valid JSON. Error: %s. Retrying... (%d/%d)", str(e), self.retries, self.max_retries)
logger.debug("Failed response (repr, first 500 chars): %s", repr(response[:500]) if response else "empty")
self.messages.append({"role": "user", "content": "LLM_RES_ERROR: Please respond in the correct JSON format.\n" + llm_output_format_str})
return False, None

Expand Down
54 changes: 54 additions & 0 deletions test/llm/test_anthropic_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,60 @@ def test_multiple_ask_calls_append_messages(self):
assert user_messages[1]["content"] == "Second question"
assert user_messages[2]["content"] == "Third question"

def test_json_with_literal_tab_characters_in_command(self):
"""Test that responses with literal tab characters in JSON strings are handled.

This reproduces the issue where LLM returns sed commands with literal tabs:
"command": "sed -i '182a\\ pm8001_ha->phy[phy_id]..."

Those literal tabs (0x09) are invalid inside JSON strings and cause JSONDecodeError.
"""
system_prompt = "You are a helpful assistant"
api = AnthropicApi(system_prompt=system_prompt)

# Mock the Anthropic client response with literal tab characters
# This simulates the actual failure scenario from the logs
mock_response = Mock()
mock_content = Mock()
# This string contains LITERAL tab characters (not \t escape sequences)
# The tabs appear after the backslash in the sed command
json_with_tabs = '{"task_done": false, "thoughts": "Adding line with sed", "command": "sed -i \'182a\\ pm8001_ha->phy[phy_id].enable_completion = \\&completion;\' file.c"}'
mock_content.text = json_with_tabs
mock_response.content = [mock_content]
api.ai_client.messages.create = Mock(return_value=mock_response)

result = api.ask("Add a line to the file")
assert isinstance(result, LLMAskResponse)
assert "sed" in result.command

def test_json_with_various_control_characters(self):
"""Test handling of various control characters that may appear in LLM responses."""
system_prompt = "You are a helpful assistant"
api = AnthropicApi(system_prompt=system_prompt)

# Test cases with different control characters
test_cases = [
# (description, json_string_with_control_char)
("literal tab in command",
'{"task_done": false, "thoughts": "test", "command": "echo\thello"}'),
("literal carriage return",
'{"task_done": false, "thoughts": "test", "command": "echo\rhello"}'),
("multiple tabs",
'{"task_done": false, "thoughts": "indented\t\t\tcode", "command": "pwd"}'),
]

for desc, json_with_ctrl_char in test_cases:
mock_response = Mock()
mock_content = Mock()
mock_content.text = json_with_ctrl_char
mock_response.content = [mock_content]
api.ai_client.messages.create = Mock(return_value=mock_response)
api.retries = 0 # Reset retries
api.messages = [] # Clear messages

result = api.ask("test")
assert isinstance(result, LLMAskResponse), f"Failed for case: {desc}"


@pytest.mark.anthropic_integration
class TestAnthropicApiIntegration:
Expand Down
74 changes: 72 additions & 2 deletions test/llm/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# Add src to path for imports
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")))

from microbots.llm.llm import LLMInterface, LLMAskResponse, llm_output_format_str
from microbots.llm.llm import LLMInterface, LLMAskResponse, llm_output_format_str, _escape_control_chars


class ConcreteLLM(LLMInterface):
Expand Down Expand Up @@ -742,4 +742,74 @@ def test_task_done_true_with_not_none_command_field(self, llm):
assert llm_response is None
assert llm.retries == 1
assert len(llm.messages) == 1
assert "When 'task_done' is true, 'command' should be an empty string." in llm.messages[0]["content"]
assert "When 'task_done' is true, 'command' should be an empty string." in llm.messages[0]["content"]


@pytest.mark.unit
class TestEscapeControlChars:
"""Tests for _escape_control_chars helper function"""

def test_literal_tab_escaped(self):
"""Literal tab (0x09) should be escaped to \\t"""
s = '{"command": "echo\thello"}'
result = _escape_control_chars(s)
parsed = json.loads(result)
assert parsed["command"] == "echo\thello"

def test_literal_carriage_return_escaped(self):
"""Literal CR (0x0D) should be escaped to \\r"""
s = '{"command": "echo\rhello"}'
result = _escape_control_chars(s)
parsed = json.loads(result)
assert parsed["command"] == "echo\rhello"

def test_other_control_chars_escaped_as_unicode(self):
"""Control chars other than tab/CR should be escaped as \\uXXXX"""
# Bell character (0x07)
s = '{"command": "echo' + chr(0x07) + 'hello"}'
result = _escape_control_chars(s)
parsed = json.loads(result)
assert parsed["command"] == "echo\x07hello"

def test_invalid_escape_sequence_doubled(self):
"""Backslash followed by invalid escape char should be double-escaped"""
# \& is not a valid JSON escape
s = '{"command": "echo ' + chr(92) + '& hello"}'
result = _escape_control_chars(s)
parsed = json.loads(result)
assert parsed["command"] == "echo \\& hello"

def test_valid_escape_sequences_preserved(self):
"""Valid JSON escape sequences (\\n, \\\\, \\\") should be left intact"""
s = '{"thoughts": "line1\\nline2", "command": "echo\\\\done"}'
result = _escape_control_chars(s)
parsed = json.loads(result)
assert parsed["thoughts"] == "line1\nline2"
assert parsed["command"] == "echo\\done"

def test_no_control_chars_passthrough(self):
"""Strings with no control chars or bad escapes pass through unchanged"""
s = '{"task_done": false, "thoughts": "all good", "command": "ls -la"}'
result = _escape_control_chars(s)
assert result == s
parsed = json.loads(result)
assert parsed["command"] == "ls -la"

def test_newlines_preserved(self):
"""Newlines should be kept as-is since they are JSON structural separators"""
s = '{\n"command": "pwd"\n}'
result = _escape_control_chars(s)
assert '\n' in result

def test_multiple_control_chars_in_one_string(self):
"""Multiple different control chars should all be escaped"""
s = '{"thoughts": "indented\t\t\tcode", "command": "pwd"}'
result = _escape_control_chars(s)
parsed = json.loads(result)
assert parsed["thoughts"] == "indented\t\t\tcode"

def test_trailing_lone_backslash(self):
"""A lone backslash at the end of the string should be double-escaped"""
s = 'test' + chr(92)
result = _escape_control_chars(s)
assert result.endswith('\\\\')
Loading