Skip to content

Commit 2d7ad5f

Browse files
authored
Merge branch 'main' into fix/951-react-agent-tool-execution-error-handling
2 parents 7f2a1ed + a79da27 commit 2d7ad5f

2 files changed

Lines changed: 358 additions & 0 deletions

File tree

agentscope-extensions/agentscope-extensions-autocontext-memory/src/main/java/io/agentscope/core/memory/autocontext/AutoContextMemory.java

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,6 +1228,99 @@ private boolean offloadingLargePayload(List<Msg> rawMessages, boolean lastKeep)
12281228
Msg msg = rawMessages.get(i);
12291229
String textContent = msg.getTextContent();
12301230

1231+
// ASSISTANT messages with ToolUseBlock (tool_calls) must NOT be offloaded as a plain
1232+
// text stub. Doing so strips the ToolUseBlock, leaving the subsequent TOOL result
1233+
// messages without a preceding tool_calls assistant message, which violates the API
1234+
// constraint: "messages with role 'tool' must be a response to a preceding message
1235+
// with 'tool_calls'". These pairs are handled exclusively by Strategy 1.
1236+
if (MsgUtils.isToolUseMessage(msg)) {
1237+
continue;
1238+
}
1239+
1240+
// TOOL result messages can have their output content offloaded, but the
1241+
// ToolResultBlock structure (id, name) MUST be preserved so that the API formatter
1242+
// can still emit the correct tool_call_id / name fields. We handle them separately.
1243+
if (MsgUtils.isToolResultMessage(msg)) {
1244+
ToolResultBlock originalResult = msg.getFirstContentBlock(ToolResultBlock.class);
1245+
if (originalResult != null) {
1246+
// Use the ToolResultBlock output text for size checking, because
1247+
// Msg.getTextContent() only extracts top-level TextBlocks and returns
1248+
// empty string for TOOL messages whose content is a ToolResultBlock.
1249+
String outputText =
1250+
originalResult.getOutput().stream()
1251+
.filter(TextBlock.class::isInstance)
1252+
.map(TextBlock.class::cast)
1253+
.map(TextBlock::getText)
1254+
.collect(Collectors.joining("\n"));
1255+
if (outputText.length() > threshold) {
1256+
String toolResultUuid = UUID.randomUUID().toString();
1257+
List<Msg> offloadMsg = new ArrayList<>();
1258+
offloadMsg.add(msg);
1259+
offload(toolResultUuid, offloadMsg);
1260+
log.info(
1261+
"Offloaded large tool result message: index={}, size={} chars,"
1262+
+ " uuid={}",
1263+
i,
1264+
outputText.length(),
1265+
toolResultUuid);
1266+
1267+
String preview =
1268+
outputText.length() > autoContextConfig.offloadSinglePreview
1269+
? outputText.substring(
1270+
0, autoContextConfig.offloadSinglePreview)
1271+
+ "..."
1272+
: outputText;
1273+
String offloadHint =
1274+
preview
1275+
+ "\n"
1276+
+ String.format(
1277+
Prompts.CONTEXT_OFFLOAD_TAG_FORMAT, toolResultUuid);
1278+
1279+
// Preserve ToolResultBlock structure (id, name, metadata) so the API
1280+
// formatter can emit the correct tool_call_id / name, and downstream
1281+
// consumers retain semantic flags (e.g. agentscope_suspended) after
1282+
// offloading. Only the output text is replaced with the offload hint.
1283+
ToolResultBlock compressedResult =
1284+
ToolResultBlock.of(
1285+
originalResult.getId(),
1286+
originalResult.getName(),
1287+
TextBlock.builder().text(offloadHint).build(),
1288+
originalResult.getMetadata());
1289+
1290+
Map<String, Object> trCompressMeta = new HashMap<>();
1291+
trCompressMeta.put("offloaduuid", toolResultUuid);
1292+
Map<String, Object> trMetadata = new HashMap<>();
1293+
trMetadata.put("_compress_meta", trCompressMeta);
1294+
1295+
Msg replacementToolMsg =
1296+
Msg.builder()
1297+
.role(msg.getRole())
1298+
.name(msg.getName())
1299+
.content(compressedResult)
1300+
.metadata(trMetadata)
1301+
.build();
1302+
1303+
int tokenBefore = TokenCounterUtil.calculateToken(List.of(msg));
1304+
int tokenAfter =
1305+
TokenCounterUtil.calculateToken(List.of(replacementToolMsg));
1306+
Map<String, Object> trEventMetadata = new HashMap<>();
1307+
trEventMetadata.put("inputToken", tokenBefore);
1308+
trEventMetadata.put("outputToken", tokenAfter);
1309+
trEventMetadata.put("time", 0.0);
1310+
1311+
String eventType =
1312+
lastKeep
1313+
? CompressionEvent.LARGE_MESSAGE_OFFLOAD_WITH_PROTECTION
1314+
: CompressionEvent.LARGE_MESSAGE_OFFLOAD;
1315+
recordCompressionEvent(eventType, i, i, rawMessages, null, trEventMetadata);
1316+
1317+
rawMessages.set(i, replacementToolMsg);
1318+
hasOffloaded = true;
1319+
}
1320+
}
1321+
continue;
1322+
}
1323+
12311324
String uuid = null;
12321325
// Check if message content exceeds threshold
12331326
if (textContent != null && textContent.length() > threshold) {

agentscope-extensions/agentscope-extensions-autocontext-memory/src/test/java/io/agentscope/core/memory/autocontext/AutoContextMemoryTest.java

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1631,6 +1631,271 @@ void testGetPlanStateContextWithDoneSubtaskWithoutOutcome() throws Exception {
16311631
"Should contain expected outcome");
16321632
}
16331633

1634+
// ==================== Tool Call Pairing Safety Tests ====================
1635+
1636+
@Test
1637+
@DisplayName(
1638+
"Should NOT offload ASSISTANT tool-call message as plain TextBlock stub during large"
1639+
+ " payload offloading (Strategy 2/3)")
1640+
void testLargePayloadOffloadingSkipsAssistantToolUseMessage() {
1641+
// Regression test for: DashScope 400 "messages with role 'tool' must be a response to a
1642+
// preceding message with 'tool_calls'".
1643+
// When an ASSISTANT message carrying ToolUseBlock is large and gets offloaded as a plain
1644+
// TextBlock stub, the downstream TOOL result messages become orphaned.
1645+
TestModel model = new TestModel("Summary");
1646+
AutoContextConfig cfg =
1647+
AutoContextConfig.builder()
1648+
.msgThreshold(5)
1649+
.largePayloadThreshold(50) // low threshold so the large message triggers
1650+
.lastKeep(2)
1651+
.minConsecutiveToolMessages(100) // disable Strategy 1
1652+
.minCompressionTokenThreshold(Integer.MAX_VALUE) // disable LLM compression
1653+
.build();
1654+
AutoContextMemory mem = new AutoContextMemory(cfg, model);
1655+
1656+
// Round 0: user → large ASSISTANT tool-call → TOOL result → ASSISTANT final
1657+
mem.addMessage(createTextMessage("User query", MsgRole.USER));
1658+
1659+
// Build a large ASSISTANT tool-use message (> largePayloadThreshold)
1660+
String largeInput = "x".repeat(200);
1661+
Msg largeToolUseMsg =
1662+
Msg.builder()
1663+
.role(MsgRole.ASSISTANT)
1664+
.name("assistant")
1665+
.content(
1666+
ToolUseBlock.builder()
1667+
.id("call_large")
1668+
.name("search")
1669+
.input(Map.of("query", largeInput))
1670+
.build())
1671+
.build();
1672+
mem.addMessage(largeToolUseMsg);
1673+
mem.addMessage(createToolResultMessage("search", "call_large", "tool output"));
1674+
mem.addMessage(createTextMessage("Assistant final response", MsgRole.ASSISTANT));
1675+
1676+
// Extra messages to push over msgThreshold
1677+
mem.addMessage(createTextMessage("Follow-up user question", MsgRole.USER));
1678+
mem.addMessage(createTextMessage("Follow-up assistant answer", MsgRole.ASSISTANT));
1679+
1680+
boolean compressed = mem.compressIfNeeded();
1681+
List<Msg> messages = mem.getMessages();
1682+
1683+
// Key assertion: the ASSISTANT message that had ToolUseBlock must still carry
1684+
// a ToolUseBlock (not be degraded to a plain TextBlock stub).
1685+
// If it were stripped, the subsequent TOOL message would be orphaned.
1686+
boolean hasOrphanedToolMsg = false;
1687+
for (int i = 0; i < messages.size(); i++) {
1688+
Msg msg = messages.get(i);
1689+
if (MsgUtils.isToolResultMessage(msg)) {
1690+
// The message immediately before a TOOL result must be ASSISTANT with tool_calls
1691+
// OR another TOOL result (parallel calls). It must NOT be a non-tool-call msg.
1692+
boolean precededByToolCall = false;
1693+
for (int j = i - 1; j >= 0; j--) {
1694+
Msg prev = messages.get(j);
1695+
if (MsgUtils.isToolUseMessage(prev)) {
1696+
precededByToolCall = true;
1697+
break;
1698+
}
1699+
if (MsgUtils.isToolResultMessage(prev)) {
1700+
// Consecutive TOOL results from the same assistant tool-call message
1701+
continue;
1702+
}
1703+
// Hit a non-tool message before finding a tool-call → orphaned
1704+
break;
1705+
}
1706+
if (!precededByToolCall) {
1707+
hasOrphanedToolMsg = true;
1708+
}
1709+
}
1710+
}
1711+
assertFalse(
1712+
hasOrphanedToolMsg,
1713+
"TOOL result messages must always be preceded by an ASSISTANT tool-call message."
1714+
+ " Offloading the ASSISTANT tool-call as a plain stub orphans them.");
1715+
}
1716+
1717+
@Test
1718+
@DisplayName(
1719+
"Should offload large TOOL result output while preserving ToolResultBlock id and name")
1720+
void testLargeToolResultOffloadPreservesIdAndName() {
1721+
// When a TOOL result message is large, Strategy 2/3 should compress its output text
1722+
// but MUST preserve the ToolResultBlock structure (id, name) so the API formatter
1723+
// can still emit the correct tool_call_id / name fields.
1724+
TestModel model = new TestModel("Summary");
1725+
AutoContextConfig cfg =
1726+
AutoContextConfig.builder()
1727+
.msgThreshold(5)
1728+
.largePayloadThreshold(50) // low threshold
1729+
.lastKeep(2)
1730+
.minConsecutiveToolMessages(100) // disable Strategy 1
1731+
.minCompressionTokenThreshold(Integer.MAX_VALUE) // disable LLM compression
1732+
.build();
1733+
AutoContextMemory mem = new AutoContextMemory(cfg, model);
1734+
1735+
// Round 0: user → ASSISTANT tool-call → large TOOL result → ASSISTANT final
1736+
mem.addMessage(createTextMessage("User query", MsgRole.USER));
1737+
mem.addMessage(createToolUseMessage("search", "call_tool_id_001"));
1738+
1739+
// Build a large TOOL result message (> largePayloadThreshold)
1740+
String largeOutput = "y".repeat(200);
1741+
Msg largeToolResultMsg =
1742+
Msg.builder()
1743+
.role(MsgRole.TOOL)
1744+
.name("search")
1745+
.content(
1746+
ToolResultBlock.builder()
1747+
.id("call_tool_id_001")
1748+
.name("search")
1749+
.output(
1750+
List.of(
1751+
TextBlock.builder()
1752+
.text(largeOutput)
1753+
.build()))
1754+
.build())
1755+
.build();
1756+
mem.addMessage(largeToolResultMsg);
1757+
mem.addMessage(createTextMessage("Assistant final response", MsgRole.ASSISTANT));
1758+
1759+
// Extra messages to push over msgThreshold
1760+
mem.addMessage(createTextMessage("Follow-up user question", MsgRole.USER));
1761+
mem.addMessage(createTextMessage("Follow-up assistant answer", MsgRole.ASSISTANT));
1762+
1763+
mem.compressIfNeeded();
1764+
List<Msg> messages = mem.getMessages();
1765+
1766+
// Find the (possibly compressed) TOOL result message
1767+
Msg toolResultMsg =
1768+
messages.stream().filter(MsgUtils::isToolResultMessage).findFirst().orElse(null);
1769+
1770+
// If the TOOL message was offloaded (compressed), it must still carry ToolResultBlock
1771+
// with the original id and name intact.
1772+
if (toolResultMsg != null) {
1773+
ToolResultBlock block = toolResultMsg.getFirstContentBlock(ToolResultBlock.class);
1774+
assertNotNull(
1775+
block,
1776+
"Compressed TOOL result message must still contain a ToolResultBlock"
1777+
+ " (not be degraded to plain TextBlock)");
1778+
assertEquals(
1779+
"call_tool_id_001",
1780+
block.getId(),
1781+
"ToolResultBlock id must be preserved after offloading");
1782+
assertEquals(
1783+
"search",
1784+
block.getName(),
1785+
"ToolResultBlock name must be preserved after offloading");
1786+
// The output should now contain the offload hint
1787+
String outputText =
1788+
block.getOutput().stream()
1789+
.filter(b -> b instanceof TextBlock)
1790+
.map(b -> ((TextBlock) b).getText())
1791+
.findFirst()
1792+
.orElse("");
1793+
assertTrue(
1794+
outputText.contains("CONTEXT_OFFLOAD"),
1795+
"Compressed tool result output should contain offload hint. Got: "
1796+
+ outputText);
1797+
}
1798+
1799+
// Also verify no orphaned TOOL messages exist
1800+
for (int i = 0; i < messages.size(); i++) {
1801+
Msg msg = messages.get(i);
1802+
if (MsgUtils.isToolResultMessage(msg)) {
1803+
boolean precededByToolCall = false;
1804+
for (int j = i - 1; j >= 0; j--) {
1805+
Msg prev = messages.get(j);
1806+
if (MsgUtils.isToolUseMessage(prev)) {
1807+
precededByToolCall = true;
1808+
break;
1809+
}
1810+
if (MsgUtils.isToolResultMessage(prev)) {
1811+
continue;
1812+
}
1813+
break;
1814+
}
1815+
assertTrue(
1816+
precededByToolCall,
1817+
"Every TOOL result must be preceded by an ASSISTANT tool-call message");
1818+
}
1819+
}
1820+
}
1821+
1822+
@Test
1823+
@DisplayName(
1824+
"Should maintain valid tool_calls/tool_result pairing after offloading large plain"
1825+
+ " messages in a mixed conversation")
1826+
void testToolCallPairingIntegrityAfterMixedOffloading() {
1827+
// Simulates the production scenario from the bug report:
1828+
// A long conversation with multiple tool-call rounds plus large plain messages.
1829+
// After Strategy 2/3 runs, every TOOL result must still follow an ASSISTANT tool-call.
1830+
TestModel model = new TestModel("Summary");
1831+
AutoContextConfig cfg =
1832+
AutoContextConfig.builder()
1833+
.msgThreshold(8)
1834+
.largePayloadThreshold(50)
1835+
.lastKeep(3)
1836+
.minConsecutiveToolMessages(100) // disable Strategy 1
1837+
.minCompressionTokenThreshold(Integer.MAX_VALUE) // disable LLM compression
1838+
.build();
1839+
AutoContextMemory mem = new AutoContextMemory(cfg, model);
1840+
1841+
// Round 0: normal tool call round (small output)
1842+
mem.addMessage(createTextMessage("User asks tool", MsgRole.USER));
1843+
mem.addMessage(createToolUseMessage("tool_a", "id_a1"));
1844+
mem.addMessage(createToolResultMessage("tool_a", "id_a1", "small result"));
1845+
mem.addMessage(createTextMessage("Assistant reply 0", MsgRole.ASSISTANT));
1846+
1847+
// Round 1: large USER message + tool call round
1848+
String largeUserText = "L".repeat(200);
1849+
mem.addMessage(
1850+
createTextMessage(largeUserText, MsgRole.USER)); // large – candidate for offload
1851+
mem.addMessage(createToolUseMessage("tool_b", "id_b1"));
1852+
mem.addMessage(createToolResultMessage("tool_b", "id_b1", "result b"));
1853+
mem.addMessage(createTextMessage("Assistant reply 1", MsgRole.ASSISTANT));
1854+
1855+
// Round 2: current (protected by lastKeep)
1856+
mem.addMessage(createTextMessage("Current user question", MsgRole.USER));
1857+
mem.addMessage(createTextMessage("Current assistant answer", MsgRole.ASSISTANT));
1858+
1859+
mem.compressIfNeeded();
1860+
List<Msg> messages = mem.getMessages();
1861+
1862+
// Invariant: for every TOOL result, scan backwards and find an ASSISTANT tool-call
1863+
// before hitting any non-tool message.
1864+
for (int i = 0; i < messages.size(); i++) {
1865+
if (!MsgUtils.isToolResultMessage(messages.get(i))) {
1866+
continue;
1867+
}
1868+
boolean found = false;
1869+
for (int j = i - 1; j >= 0; j--) {
1870+
Msg prev = messages.get(j);
1871+
if (MsgUtils.isToolUseMessage(prev)) {
1872+
found = true;
1873+
break;
1874+
}
1875+
if (MsgUtils.isToolResultMessage(prev)) {
1876+
continue; // parallel tool results
1877+
}
1878+
break;
1879+
}
1880+
assertTrue(
1881+
found,
1882+
"TOOL result at index "
1883+
+ i
1884+
+ " is orphaned – no preceding ASSISTANT tool-call found."
1885+
+ " Full message sequence: "
1886+
+ messages.stream()
1887+
.map(
1888+
m ->
1889+
m.getRole()
1890+
+ "(toolUse="
1891+
+ MsgUtils.isToolUseMessage(m)
1892+
+ ",toolResult="
1893+
+ MsgUtils.isToolResultMessage(m)
1894+
+ ")")
1895+
.toList());
1896+
}
1897+
}
1898+
16341899
@Test
16351900
@DisplayName("Should return plan context with different plan states")
16361901
void testGetPlanStateContextWithDifferentPlanStates() throws Exception {

0 commit comments

Comments
 (0)