diff --git a/src/core/context-management/__tests__/context-management.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts index 9950ec536b3..aa574d87429 100644 --- a/src/core/context-management/__tests__/context-management.spec.ts +++ b/src/core/context-management/__tests__/context-management.spec.ts @@ -343,7 +343,10 @@ describe("Context Management", () => { expect(result.messagesRemoved).toBe(2) // With 4 messages after first, 0.5 fraction = 2 to remove expect(result.summary).toBe("") expect(result.cost).toBe(0) - expect(result.prevContextTokens).toBe(totalTokens) + // prevContextTokens is now locally counted (same method as newContextTokensAfterTruncation) + // to ensure consistent before/after display (#11990) + expect(result.prevContextTokens).toBeGreaterThan(0) + expect(result.prevContextTokens).toBeGreaterThanOrEqual(result.newContextTokensAfterTruncation ?? 0) // Should have all original messages + truncation marker (non-destructive) expect(result.messages.length).toBe(6) // 5 original + 1 marker }) @@ -505,7 +508,11 @@ describe("Context Management", () => { expect(resultWithLarge.messages).not.toEqual(messagesWithLargeContent) // Should truncate expect(resultWithLarge.summary).toBe("") expect(resultWithLarge.cost).toBe(0) - expect(resultWithLarge.prevContextTokens).toBe(baseTokensForLarge + largeContentTokens) + // prevContextTokens is locally counted in truncation path for consistent display (#11990) + expect(resultWithLarge.prevContextTokens).toBeGreaterThan(0) + expect(resultWithLarge.prevContextTokens).toBeGreaterThanOrEqual( + resultWithLarge.newContextTokensAfterTruncation ?? 0, + ) // Test case 3: Very large content that will definitely exceed threshold const veryLargeContent = [{ type: "text" as const, text: "X".repeat(1000) }] @@ -533,7 +540,11 @@ describe("Context Management", () => { expect(resultWithVeryLarge.messages).not.toEqual(messagesWithVeryLargeContent) // Should truncate expect(resultWithVeryLarge.summary).toBe("") expect(resultWithVeryLarge.cost).toBe(0) - expect(resultWithVeryLarge.prevContextTokens).toBe(baseTokensForVeryLarge + veryLargeContentTokens) + // prevContextTokens is locally counted in truncation path for consistent display (#11990) + expect(resultWithVeryLarge.prevContextTokens).toBeGreaterThan(0) + expect(resultWithVeryLarge.prevContextTokens).toBeGreaterThanOrEqual( + resultWithVeryLarge.newContextTokensAfterTruncation ?? 0, + ) }) it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", async () => { @@ -566,7 +577,9 @@ describe("Context Management", () => { expect(result.messagesRemoved).toBe(2) // With 4 messages after first, 0.5 fraction = 2 to remove expect(result.summary).toBe("") expect(result.cost).toBe(0) - expect(result.prevContextTokens).toBe(totalTokens) + // prevContextTokens is locally counted in truncation path for consistent display (#11990) + expect(result.prevContextTokens).toBeGreaterThan(0) + expect(result.prevContextTokens).toBeGreaterThanOrEqual(result.newContextTokensAfterTruncation ?? 0) // Should have all original messages + truncation marker (non-destructive) expect(result.messages.length).toBe(6) // 5 original + 1 marker }) @@ -682,7 +695,9 @@ describe("Context Management", () => { expect(result.truncationId).toBeDefined() expect(result.messagesRemoved).toBe(2) expect(result.summary).toBe("") - expect(result.prevContextTokens).toBe(totalTokens) + // prevContextTokens is locally counted in truncation path for consistent display (#11990) + expect(result.prevContextTokens).toBeGreaterThan(0) + expect(result.prevContextTokens).toBeGreaterThanOrEqual(result.newContextTokensAfterTruncation ?? 0) // Should have all original messages + truncation marker expect(result.messages.length).toBe(6) // 5 original + 1 marker // The cost might be different than expected, so we don't check it @@ -733,7 +748,9 @@ describe("Context Management", () => { expect(result.messagesRemoved).toBe(2) expect(result.summary).toBe("") expect(result.cost).toBe(0) - expect(result.prevContextTokens).toBe(totalTokens) + // prevContextTokens is locally counted in truncation path for consistent display (#11990) + expect(result.prevContextTokens).toBeGreaterThan(0) + expect(result.prevContextTokens).toBeGreaterThanOrEqual(result.newContextTokensAfterTruncation ?? 0) // Should have all original messages + truncation marker expect(result.messages.length).toBe(6) // 5 original + 1 marker @@ -1336,7 +1353,9 @@ describe("Context Management", () => { expect(result2.messagesRemoved).toBe(2) expect(result2.summary).toBe("") expect(result2.cost).toBe(0) - expect(result2.prevContextTokens).toBe(50001) + // prevContextTokens is locally counted in truncation path for consistent display (#11990) + expect(result2.prevContextTokens).toBeGreaterThan(0) + expect(result2.prevContextTokens).toBeGreaterThanOrEqual(result2.newContextTokensAfterTruncation ?? 0) }) it("should use ANTHROPIC_DEFAULT_MAX_TOKENS as buffer when maxTokens is undefined", async () => { @@ -1391,7 +1410,9 @@ describe("Context Management", () => { expect(result2.truncationId).toBeDefined() expect(result2.summary).toBe("") expect(result2.cost).toBe(0) - expect(result2.prevContextTokens).toBe(81809) + // prevContextTokens is locally counted in truncation path for consistent display (#11990) + expect(result2.prevContextTokens).toBeGreaterThan(0) + expect(result2.prevContextTokens).toBeGreaterThanOrEqual(result2.newContextTokensAfterTruncation ?? 0) }) it("should handle small context windows appropriately", async () => { diff --git a/src/core/context-management/index.ts b/src/core/context-management/index.ts index 243d7bd797f..d9caa2ceb28 100644 --- a/src/core/context-management/index.ts +++ b/src/core/context-management/index.ts @@ -332,6 +332,26 @@ export async function manageContext({ // Fall back to sliding window truncation if needed if (prevContextTokens > allowedTokens) { + // Calculate "before" token count using local estimation so it's consistent + // with the "after" count. Using API-reported values for "before" and local + // estimation for "after" can produce inconsistent results where "after" appears + // larger than "before" (see #11990). + const visibleMessagesBeforeTruncation = messages.filter( + (msg) => !msg.truncationParent && !msg.isTruncationMarker, + ) + + const systemPromptTokens = await estimateTokenCount([{ type: "text", text: systemPrompt }], apiHandler) + + let localPrevContextTokens = systemPromptTokens + for (const msg of visibleMessagesBeforeTruncation) { + const content = msg.content + if (Array.isArray(content)) { + localPrevContextTokens += await estimateTokenCount(content, apiHandler) + } else if (typeof content === "string") { + localPrevContextTokens += await estimateTokenCount([{ type: "text", text: content }], apiHandler) + } + } + const truncationResult = truncateConversation(messages, 0.5, taskId) // Calculate new context tokens after truncation by counting non-truncated messages @@ -341,11 +361,7 @@ export async function manageContext({ ) // Include system prompt tokens so this value matches what we send to the API. - // Note: `prevContextTokens` is computed locally here (totalTokens + lastMessageTokens). - let newContextTokensAfterTruncation = await estimateTokenCount( - [{ type: "text", text: systemPrompt }], - apiHandler, - ) + let newContextTokensAfterTruncation = systemPromptTokens for (const msg of effectiveMessages) { const content = msg.content @@ -361,7 +377,7 @@ export async function manageContext({ return { messages: truncationResult.messages, - prevContextTokens, + prevContextTokens: localPrevContextTokens, summary: "", cost, error,