From 570fba25a5552f9949ae0733ea2f7860e3f9277c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 2 Jun 2026 23:20:36 +0000 Subject: [PATCH 1/4] Initial plan From 2aabedf7b81f142ab827ce9cfe3ec995d2aadcb6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 2 Jun 2026 23:24:46 +0000 Subject: [PATCH 2/4] fix: recover from truncated LLM JSON responses --- qa_agent/ai_planner.py | 55 ++++++++++++++++++++++++++++++++++++++++ tests/test_ai_planner.py | 12 +++++++++ 2 files changed, 67 insertions(+) diff --git a/qa_agent/ai_planner.py b/qa_agent/ai_planner.py index 184d14e..c560112 100644 --- a/qa_agent/ai_planner.py +++ b/qa_agent/ai_planner.py @@ -255,6 +255,52 @@ "visible", "hidden", "text_contains", "url_contains", "element_count" }) + +def _repair_truncated_json(text: str) -> str | None: + """Attempt to repair truncated JSON by closing open strings/containers.""" + if not text.strip().startswith("{"): + return None + + stack: list[str] = [] + in_string = False + escaped = False + + for ch in text: + if in_string: + if escaped: + escaped = False + continue + if ch == "\\": + escaped = True + elif ch == '"': + in_string = False + continue + + if ch == '"': + in_string = True + elif ch in "{[": + stack.append(ch) + elif ch in "}]": + if not stack: + return None + opener = stack.pop() + if (opener == "{" and ch != "}") or (opener == "[" and ch != "]"): + return None + + if not in_string and not stack: + return None + + repaired = text + if in_string: + if escaped: + repaired += "\\" + repaired += '"' + for opener in reversed(stack): + repaired += "}" if opener == "{" else "]" + + return repaired + + def validate_plan(plan: "TestPlan") -> list[str]: """Return rule-based reliability warnings for a generated TestPlan. @@ -420,6 +466,15 @@ def _parse_json(self, text: str) -> dict: try: data = json.loads(stripped) except json.JSONDecodeError as exc: + repaired = _repair_truncated_json(stripped) + if repaired is not None: + try: + data = json.loads(repaired) + except json.JSONDecodeError: + data = None + else: + if isinstance(data, dict): + return data preview = text[:_MAX_RAW_RESPONSE_IN_ERROR] suffix = "…" if len(text) > _MAX_RAW_RESPONSE_IN_ERROR else "" raise ValueError( diff --git a/tests/test_ai_planner.py b/tests/test_ai_planner.py index 02454ba..dac3239 100644 --- a/tests/test_ai_planner.py +++ b/tests/test_ai_planner.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import os from unittest.mock import MagicMock, patch @@ -89,6 +90,17 @@ def test_malformed_json_raises_value_error(self): with pytest.raises(ValueError, match="invalid JSON"): planner.plan("test", "https://example.com") + def test_truncated_json_warning_string_is_repaired(self): + data = json.loads(VALID_PLAN_JSON) + data["warnings"] = [ + "CSS checks cannot be verified via Playwright computed-style assertions; use visual regression." + ] + truncated = json.dumps(data)[:-3] # chop trailing quote/bracket/brace + planner = self._planner(truncated) + plan = planner.plan("test", "https://example.com") + assert plan.warnings + assert "computed-style assertions" in plan.warnings[0] + def test_no_text_content_raises_llm_error(self): from qa_agent.llm_client import LLMError client = MagicMock() From e098675ea0b2eb9e361b335b380aba13677d10c6 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 2 Jun 2026 23:47:02 +0000 Subject: [PATCH 3/4] fix: address code review feedback - escape bug, logging, and comprehensive tests - Fix escape sequence bug by returning None for mid-escape truncation - Remove dead code assignment in JSON parsing error handler - Add logging to track successful truncated JSON repairs - Add comprehensive test coverage for edge cases: * Mid-escape truncation (verifies it fails safely) * Nested structure truncation * Valid JSON (verifies repair returns None) * Single unclosed string repair * Multiple unclosed containers * Mismatched brackets rejection * Non-object JSON rejection Co-authored-by: openhands --- qa_agent/ai_planner.py | 16 +++++++-- tests/test_ai_planner.py | 77 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 3 deletions(-) diff --git a/qa_agent/ai_planner.py b/qa_agent/ai_planner.py index c560112..3729e0b 100644 --- a/qa_agent/ai_planner.py +++ b/qa_agent/ai_planner.py @@ -9,6 +9,7 @@ """ import json +import logging import time from .llm_client import ( @@ -255,6 +256,8 @@ "visible", "hidden", "text_contains", "url_contains", "element_count" }) +logger = logging.getLogger(__name__) + def _repair_truncated_json(text: str) -> str | None: """Attempt to repair truncated JSON by closing open strings/containers.""" @@ -290,10 +293,12 @@ def _repair_truncated_json(text: str) -> str | None: if not in_string and not stack: return None + # Can't safely repair mid-escape sequence - unclear what the intended escape was + if escaped: + return None + repaired = text if in_string: - if escaped: - repaired += "\\" repaired += '"' for opener in reversed(stack): repaired += "}" if opener == "{" else "]" @@ -471,9 +476,14 @@ def _parse_json(self, text: str) -> dict: try: data = json.loads(repaired) except json.JSONDecodeError: - data = None + pass # Repair failed, fall through to original error else: if isinstance(data, dict): + logger.warning( + "Recovered from truncated LLM response (%d chars repaired). " + "Original length: %d, repaired length: %d", + len(repaired) - len(stripped), len(stripped), len(repaired) + ) return data preview = text[:_MAX_RAW_RESPONSE_IN_ERROR] suffix = "…" if len(text) > _MAX_RAW_RESPONSE_IN_ERROR else "" diff --git a/tests/test_ai_planner.py b/tests/test_ai_planner.py index dac3239..fbdd530 100644 --- a/tests/test_ai_planner.py +++ b/tests/test_ai_planner.py @@ -101,6 +101,83 @@ def test_truncated_json_warning_string_is_repaired(self): assert plan.warnings assert "computed-style assertions" in plan.warnings[0] + def test_truncated_after_backslash_raises_error(self): + """Truncation mid-escape should fail safely - can't infer intended escape.""" + data = json.loads(VALID_PLAN_JSON) + data["notes"] = "Line 1\\nLine 2" # Contains newline escape + full_json = json.dumps(data) + # Truncate after a backslash in the middle of an escape sequence + idx = full_json.index("\\n") + truncated = full_json[:idx + 1] # Keep backslash, remove the 'n' + + planner = self._planner(truncated) + # Should raise because repair returns None for mid-escape truncation + with pytest.raises(ValueError, match="invalid JSON"): + planner.plan("test", "https://example.com") + + def test_truncated_nested_structures_repaired(self): + """Multiple unclosed containers should all be closed.""" + # Create JSON with nested structures and truncate mid-way + data = json.loads(VALID_PLAN_JSON) + data["custom_steps"][0]["actions"].append({"type": "hover", "selector": "#menu"}) + full_json = json.dumps(data) + # Truncate in the middle of the nested structure + # Find a point deep in the nesting and truncate there + truncate_at = full_json.index('"hover"') + len('"hover"') + truncated = full_json[:truncate_at] + + planner = self._planner(truncated) + # Should successfully repair by closing all open containers + plan = planner.plan("test", "https://example.com") + assert isinstance(plan, TestPlan) + # At minimum should have the summary from before truncation + assert plan.summary == "Test the login flow" + + def test_already_valid_json_returns_none_from_repair_fn(self): + """_repair_truncated_json should return None for already-valid JSON.""" + from qa_agent.ai_planner import _repair_truncated_json + assert _repair_truncated_json('{"key": "value"}') is None + assert _repair_truncated_json(VALID_PLAN_JSON) is None + + def test_repair_closes_unclosed_string(self): + """Single unclosed string should be closed.""" + partial = '{"summary": "test in progress' + from qa_agent.ai_planner import _repair_truncated_json + repaired = _repair_truncated_json(partial) + assert repaired is not None + # Should add closing quote and brace + assert repaired == partial + '"}' + # Verify it's valid JSON + parsed = json.loads(repaired) + assert parsed["summary"] == "test in progress" + + def test_repair_closes_multiple_containers(self): + """Multiple unclosed objects/arrays should all be closed.""" + partial = '{"custom_steps": [{"actions": [{"type": "click"' + from qa_agent.ai_planner import _repair_truncated_json + repaired = _repair_truncated_json(partial) + assert repaired is not None + # Should close: string, object (action), array (actions), object (step), array (steps), object (root) + assert repaired.endswith('"}]}]}') + # Verify it's valid JSON + parsed = json.loads(repaired) + assert "custom_steps" in parsed + + def test_repair_rejects_mismatched_brackets(self): + """Malformed JSON with bracket mismatches should return None.""" + from qa_agent.ai_planner import _repair_truncated_json + # Opening { but closing ] + assert _repair_truncated_json('{"key": [}') is None + # Closing without opening + assert _repair_truncated_json('{"key": "value"}}') is None + + def test_repair_rejects_non_object_json(self): + """Repair only works for truncated objects, not arrays or primitives.""" + from qa_agent.ai_planner import _repair_truncated_json + assert _repair_truncated_json('["array"') is None + assert _repair_truncated_json('"string') is None + assert _repair_truncated_json('123') is None + def test_no_text_content_raises_llm_error(self): from qa_agent.llm_client import LLMError client = MagicMock() From b1916db62574b1ffa1a0720f0557e5e223abce98 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 2 Jun 2026 23:51:29 +0000 Subject: [PATCH 4/4] fix: correct test escaping for mid-escape truncation test The test was using double-backslash which created a literal backslash character instead of a newline escape sequence. Changed to use actual newline character so json.dumps creates the \n escape sequence that can be truncated for proper testing. Co-authored-by: openhands --- tests/test_ai_planner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_ai_planner.py b/tests/test_ai_planner.py index fbdd530..494ef48 100644 --- a/tests/test_ai_planner.py +++ b/tests/test_ai_planner.py @@ -104,10 +104,10 @@ def test_truncated_json_warning_string_is_repaired(self): def test_truncated_after_backslash_raises_error(self): """Truncation mid-escape should fail safely - can't infer intended escape.""" data = json.loads(VALID_PLAN_JSON) - data["notes"] = "Line 1\\nLine 2" # Contains newline escape + data["notes"] = "Line 1\nLine 2" # Contains actual newline character full_json = json.dumps(data) - # Truncate after a backslash in the middle of an escape sequence - idx = full_json.index("\\n") + # In JSON, newline becomes \n escape sequence. Find it and truncate after backslash + idx = full_json.index("\\n") # Find the \n in the JSON string truncated = full_json[:idx + 1] # Keep backslash, remove the 'n' planner = self._planner(truncated)