diff --git a/.github/workflows/dockerBuildPush.yml b/.github/workflows/dockerBuildPush.yml index 08341d0..ae10709 100644 --- a/.github/workflows/dockerBuildPush.yml +++ b/.github/workflows/dockerBuildPush.yml @@ -1,6 +1,7 @@ name: Build and Push to Docker Hub on: + workflow_dispatch: push: paths: - 'src/microbots/environment/local_docker/image_builder/Dockerfile' diff --git a/src/microbots/llm/anthropic_api.py b/src/microbots/llm/anthropic_api.py index f40118a..d49e066 100644 --- a/src/microbots/llm/anthropic_api.py +++ b/src/microbots/llm/anthropic_api.py @@ -1,5 +1,6 @@ import json import os +import re from dataclasses import asdict from logging import getLogger @@ -47,13 +48,23 @@ def ask(self, message) -> LLMAskResponse: # Extract text content from response response_text = response.content[0].text if response.content else "" - logger.debug("Raw Anthropic response (first 500 chars): %s", response_text[:500]) + logger.debug("Raw Anthropic response length: %d chars", len(response_text)) + logger.debug("Raw Anthropic response (first 1000 chars): %s", response_text[:1000]) + + # Log any control characters found in the response + control_chars = [f"\\x{ord(c):02x}" for c in response_text if ord(c) < 0x20 and c not in '\n'] + if control_chars: + logger.debug("Control characters found in response: %s", control_chars[:20]) # Log first 20 # Try to extract JSON if wrapped in markdown code blocks - import re json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL) if json_match: + logger.debug("JSON extracted from markdown code block") response_text = json_match.group(1) + else: + logger.debug("No markdown code block found, using raw response") + + logger.debug("Response text to validate (first 500 chars): %s", repr(response_text[:500])) valid, askResponse = self._validate_llm_response(response=response_text) diff --git a/src/microbots/llm/llm.py b/src/microbots/llm/llm.py index 2800790..14ca821 100644 --- a/src/microbots/llm/llm.py +++ b/src/microbots/llm/llm.py @@ -6,6 +6,53 @@ logger = getLogger(__name__) +def _escape_control_chars(s: str) -> str: + """Escape control characters and fix invalid escape sequences for JSON parsing. + + JSON spec issues that LLMs commonly produce: + 1. Unescaped control characters (0x00-0x1F) inside strings - tabs, etc. + 2. Invalid escape sequences like \\& (backslash followed by non-escape char) + + We keep newlines as-is since they're part of JSON structure (between fields). + """ + # Valid JSON escape sequences (after the backslash) + valid_json_escapes = set('"\\/bfnrtu') + + result = [] + i = 0 + while i < len(s): + char = s[i] + code = ord(char) + + if code < 0x20 and char != '\n': # Control char but not newline + if char == '\t': + result.append('\\t') + elif char == '\r': + result.append('\\r') + else: + # Escape other control chars as Unicode + result.append(f'\\u{code:04x}') + elif char == '\\' and i + 1 < len(s): + # Check if this backslash is part of a valid JSON escape + next_char = s[i + 1] + if next_char in valid_json_escapes: + # Valid escape sequence, keep both chars and skip ahead + result.append(char) + result.append(next_char) + i += 2 + continue + else: + # Invalid escape sequence - double the backslash + result.append('\\\\') + elif char == '\\': + # Trailing backslash with no next char - double it + result.append('\\\\') + else: + result.append(char) + i += 1 + return ''.join(result) + + llm_output_format_str = """ { "task_done": , // Indicates if the task is completed @@ -36,10 +83,17 @@ def _validate_llm_response(self, response: str) -> tuple[bool, LLMAskResponse]: raise Exception("LLM is not responding in expected format. Maximum retries reached.") try: - response_dict = json.loads(response) - except json.JSONDecodeError: + logger.debug("Attempting to parse JSON response (length: %d)", len(response)) + # Sanitize control characters that may appear unescaped in JSON strings + # JSON spec forbids unescaped control chars (0x00-0x1F) inside strings + # LLMs sometimes output literal tabs/etc in command strings + sanitized_response = _escape_control_chars(response) + response_dict = json.loads(sanitized_response) + logger.debug("JSON parsed successfully") + except json.JSONDecodeError as e: self.retries += 1 - logger.warning("LLM response is not valid JSON. Retrying... (%d/%d)", self.retries, self.max_retries) + logger.warning("LLM response is not valid JSON. Error: %s. Retrying... (%d/%d)", str(e), self.retries, self.max_retries) + logger.debug("Failed response (repr, first 500 chars): %s", repr(response[:500]) if response else "empty") self.messages.append({"role": "user", "content": "LLM_RES_ERROR: Please respond in the correct JSON format.\n" + llm_output_format_str}) return False, None diff --git a/test/llm/test_anthropic_api.py b/test/llm/test_anthropic_api.py index 674294c..285fcf3 100644 --- a/test/llm/test_anthropic_api.py +++ b/test/llm/test_anthropic_api.py @@ -467,6 +467,60 @@ def test_multiple_ask_calls_append_messages(self): assert user_messages[1]["content"] == "Second question" assert user_messages[2]["content"] == "Third question" + def test_json_with_literal_tab_characters_in_command(self): + """Test that responses with literal tab characters in JSON strings are handled. + + This reproduces the issue where LLM returns sed commands with literal tabs: + "command": "sed -i '182a\\ pm8001_ha->phy[phy_id]..." + + Those literal tabs (0x09) are invalid inside JSON strings and cause JSONDecodeError. + """ + system_prompt = "You are a helpful assistant" + api = AnthropicApi(system_prompt=system_prompt) + + # Mock the Anthropic client response with literal tab characters + # This simulates the actual failure scenario from the logs + mock_response = Mock() + mock_content = Mock() + # This string contains LITERAL tab characters (not \t escape sequences) + # The tabs appear after the backslash in the sed command + json_with_tabs = '{"task_done": false, "thoughts": "Adding line with sed", "command": "sed -i \'182a\\ pm8001_ha->phy[phy_id].enable_completion = \\&completion;\' file.c"}' + mock_content.text = json_with_tabs + mock_response.content = [mock_content] + api.ai_client.messages.create = Mock(return_value=mock_response) + + result = api.ask("Add a line to the file") + assert isinstance(result, LLMAskResponse) + assert "sed" in result.command + + def test_json_with_various_control_characters(self): + """Test handling of various control characters that may appear in LLM responses.""" + system_prompt = "You are a helpful assistant" + api = AnthropicApi(system_prompt=system_prompt) + + # Test cases with different control characters + test_cases = [ + # (description, json_string_with_control_char) + ("literal tab in command", + '{"task_done": false, "thoughts": "test", "command": "echo\thello"}'), + ("literal carriage return", + '{"task_done": false, "thoughts": "test", "command": "echo\rhello"}'), + ("multiple tabs", + '{"task_done": false, "thoughts": "indented\t\t\tcode", "command": "pwd"}'), + ] + + for desc, json_with_ctrl_char in test_cases: + mock_response = Mock() + mock_content = Mock() + mock_content.text = json_with_ctrl_char + mock_response.content = [mock_content] + api.ai_client.messages.create = Mock(return_value=mock_response) + api.retries = 0 # Reset retries + api.messages = [] # Clear messages + + result = api.ask("test") + assert isinstance(result, LLMAskResponse), f"Failed for case: {desc}" + @pytest.mark.anthropic_integration class TestAnthropicApiIntegration: diff --git a/test/llm/test_llm.py b/test/llm/test_llm.py index bd1b82d..341cdbf 100644 --- a/test/llm/test_llm.py +++ b/test/llm/test_llm.py @@ -9,7 +9,7 @@ # Add src to path for imports sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src"))) -from microbots.llm.llm import LLMInterface, LLMAskResponse, llm_output_format_str +from microbots.llm.llm import LLMInterface, LLMAskResponse, llm_output_format_str, _escape_control_chars class ConcreteLLM(LLMInterface): @@ -742,4 +742,74 @@ def test_task_done_true_with_not_none_command_field(self, llm): assert llm_response is None assert llm.retries == 1 assert len(llm.messages) == 1 - assert "When 'task_done' is true, 'command' should be an empty string." in llm.messages[0]["content"] \ No newline at end of file + assert "When 'task_done' is true, 'command' should be an empty string." in llm.messages[0]["content"] + + +@pytest.mark.unit +class TestEscapeControlChars: + """Tests for _escape_control_chars helper function""" + + def test_literal_tab_escaped(self): + """Literal tab (0x09) should be escaped to \\t""" + s = '{"command": "echo\thello"}' + result = _escape_control_chars(s) + parsed = json.loads(result) + assert parsed["command"] == "echo\thello" + + def test_literal_carriage_return_escaped(self): + """Literal CR (0x0D) should be escaped to \\r""" + s = '{"command": "echo\rhello"}' + result = _escape_control_chars(s) + parsed = json.loads(result) + assert parsed["command"] == "echo\rhello" + + def test_other_control_chars_escaped_as_unicode(self): + """Control chars other than tab/CR should be escaped as \\uXXXX""" + # Bell character (0x07) + s = '{"command": "echo' + chr(0x07) + 'hello"}' + result = _escape_control_chars(s) + parsed = json.loads(result) + assert parsed["command"] == "echo\x07hello" + + def test_invalid_escape_sequence_doubled(self): + """Backslash followed by invalid escape char should be double-escaped""" + # \& is not a valid JSON escape + s = '{"command": "echo ' + chr(92) + '& hello"}' + result = _escape_control_chars(s) + parsed = json.loads(result) + assert parsed["command"] == "echo \\& hello" + + def test_valid_escape_sequences_preserved(self): + """Valid JSON escape sequences (\\n, \\\\, \\\") should be left intact""" + s = '{"thoughts": "line1\\nline2", "command": "echo\\\\done"}' + result = _escape_control_chars(s) + parsed = json.loads(result) + assert parsed["thoughts"] == "line1\nline2" + assert parsed["command"] == "echo\\done" + + def test_no_control_chars_passthrough(self): + """Strings with no control chars or bad escapes pass through unchanged""" + s = '{"task_done": false, "thoughts": "all good", "command": "ls -la"}' + result = _escape_control_chars(s) + assert result == s + parsed = json.loads(result) + assert parsed["command"] == "ls -la" + + def test_newlines_preserved(self): + """Newlines should be kept as-is since they are JSON structural separators""" + s = '{\n"command": "pwd"\n}' + result = _escape_control_chars(s) + assert '\n' in result + + def test_multiple_control_chars_in_one_string(self): + """Multiple different control chars should all be escaped""" + s = '{"thoughts": "indented\t\t\tcode", "command": "pwd"}' + result = _escape_control_chars(s) + parsed = json.loads(result) + assert parsed["thoughts"] == "indented\t\t\tcode" + + def test_trailing_lone_backslash(self): + """A lone backslash at the end of the string should be double-escaped""" + s = 'test' + chr(92) + result = _escape_control_chars(s) + assert result.endswith('\\\\') \ No newline at end of file