Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,10 @@ describe("Context Management", () => {
expect(result.messagesRemoved).toBe(2) // With 4 messages after first, 0.5 fraction = 2 to remove
expect(result.summary).toBe("")
expect(result.cost).toBe(0)
expect(result.prevContextTokens).toBe(totalTokens)
// prevContextTokens is now locally counted (same method as newContextTokensAfterTruncation)
// to ensure consistent before/after display (#11990)
expect(result.prevContextTokens).toBeGreaterThan(0)
expect(result.prevContextTokens).toBeGreaterThanOrEqual(result.newContextTokensAfterTruncation ?? 0)
// Should have all original messages + truncation marker (non-destructive)
expect(result.messages.length).toBe(6) // 5 original + 1 marker
})
Expand Down Expand Up @@ -505,7 +508,11 @@ describe("Context Management", () => {
expect(resultWithLarge.messages).not.toEqual(messagesWithLargeContent) // Should truncate
expect(resultWithLarge.summary).toBe("")
expect(resultWithLarge.cost).toBe(0)
expect(resultWithLarge.prevContextTokens).toBe(baseTokensForLarge + largeContentTokens)
// prevContextTokens is locally counted in truncation path for consistent display (#11990)
expect(resultWithLarge.prevContextTokens).toBeGreaterThan(0)
expect(resultWithLarge.prevContextTokens).toBeGreaterThanOrEqual(
resultWithLarge.newContextTokensAfterTruncation ?? 0,
)

// Test case 3: Very large content that will definitely exceed threshold
const veryLargeContent = [{ type: "text" as const, text: "X".repeat(1000) }]
Expand Down Expand Up @@ -533,7 +540,11 @@ describe("Context Management", () => {
expect(resultWithVeryLarge.messages).not.toEqual(messagesWithVeryLargeContent) // Should truncate
expect(resultWithVeryLarge.summary).toBe("")
expect(resultWithVeryLarge.cost).toBe(0)
expect(resultWithVeryLarge.prevContextTokens).toBe(baseTokensForVeryLarge + veryLargeContentTokens)
// prevContextTokens is locally counted in truncation path for consistent display (#11990)
expect(resultWithVeryLarge.prevContextTokens).toBeGreaterThan(0)
expect(resultWithVeryLarge.prevContextTokens).toBeGreaterThanOrEqual(
resultWithVeryLarge.newContextTokensAfterTruncation ?? 0,
)
})

it("should truncate if tokens are within TOKEN_BUFFER_PERCENTAGE of the threshold", async () => {
Expand Down Expand Up @@ -566,7 +577,9 @@ describe("Context Management", () => {
expect(result.messagesRemoved).toBe(2) // With 4 messages after first, 0.5 fraction = 2 to remove
expect(result.summary).toBe("")
expect(result.cost).toBe(0)
expect(result.prevContextTokens).toBe(totalTokens)
// prevContextTokens is locally counted in truncation path for consistent display (#11990)
expect(result.prevContextTokens).toBeGreaterThan(0)
expect(result.prevContextTokens).toBeGreaterThanOrEqual(result.newContextTokensAfterTruncation ?? 0)
// Should have all original messages + truncation marker (non-destructive)
expect(result.messages.length).toBe(6) // 5 original + 1 marker
})
Expand Down Expand Up @@ -682,7 +695,9 @@ describe("Context Management", () => {
expect(result.truncationId).toBeDefined()
expect(result.messagesRemoved).toBe(2)
expect(result.summary).toBe("")
expect(result.prevContextTokens).toBe(totalTokens)
// prevContextTokens is locally counted in truncation path for consistent display (#11990)
expect(result.prevContextTokens).toBeGreaterThan(0)
expect(result.prevContextTokens).toBeGreaterThanOrEqual(result.newContextTokensAfterTruncation ?? 0)
// Should have all original messages + truncation marker
expect(result.messages.length).toBe(6) // 5 original + 1 marker
// The cost might be different than expected, so we don't check it
Expand Down Expand Up @@ -733,7 +748,9 @@ describe("Context Management", () => {
expect(result.messagesRemoved).toBe(2)
expect(result.summary).toBe("")
expect(result.cost).toBe(0)
expect(result.prevContextTokens).toBe(totalTokens)
// prevContextTokens is locally counted in truncation path for consistent display (#11990)
expect(result.prevContextTokens).toBeGreaterThan(0)
expect(result.prevContextTokens).toBeGreaterThanOrEqual(result.newContextTokensAfterTruncation ?? 0)
// Should have all original messages + truncation marker
expect(result.messages.length).toBe(6) // 5 original + 1 marker

Expand Down Expand Up @@ -1336,7 +1353,9 @@ describe("Context Management", () => {
expect(result2.messagesRemoved).toBe(2)
expect(result2.summary).toBe("")
expect(result2.cost).toBe(0)
expect(result2.prevContextTokens).toBe(50001)
// prevContextTokens is locally counted in truncation path for consistent display (#11990)
expect(result2.prevContextTokens).toBeGreaterThan(0)
expect(result2.prevContextTokens).toBeGreaterThanOrEqual(result2.newContextTokensAfterTruncation ?? 0)
})

it("should use ANTHROPIC_DEFAULT_MAX_TOKENS as buffer when maxTokens is undefined", async () => {
Expand Down Expand Up @@ -1391,7 +1410,9 @@ describe("Context Management", () => {
expect(result2.truncationId).toBeDefined()
expect(result2.summary).toBe("")
expect(result2.cost).toBe(0)
expect(result2.prevContextTokens).toBe(81809)
// prevContextTokens is locally counted in truncation path for consistent display (#11990)
expect(result2.prevContextTokens).toBeGreaterThan(0)
expect(result2.prevContextTokens).toBeGreaterThanOrEqual(result2.newContextTokensAfterTruncation ?? 0)
})

it("should handle small context windows appropriately", async () => {
Expand Down
28 changes: 22 additions & 6 deletions src/core/context-management/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,26 @@ export async function manageContext({

// Fall back to sliding window truncation if needed
if (prevContextTokens > allowedTokens) {
// Calculate "before" token count using local estimation so it's consistent
// with the "after" count. Using API-reported values for "before" and local
// estimation for "after" can produce inconsistent results where "after" appears
// larger than "before" (see #11990).
const visibleMessagesBeforeTruncation = messages.filter(
(msg) => !msg.truncationParent && !msg.isTruncationMarker,
)

const systemPromptTokens = await estimateTokenCount([{ type: "text", text: systemPrompt }], apiHandler)

let localPrevContextTokens = systemPromptTokens
for (const msg of visibleMessagesBeforeTruncation) {
const content = msg.content
if (Array.isArray(content)) {
localPrevContextTokens += await estimateTokenCount(content, apiHandler)
} else if (typeof content === "string") {
localPrevContextTokens += await estimateTokenCount([{ type: "text", text: content }], apiHandler)
}
}

const truncationResult = truncateConversation(messages, 0.5, taskId)

// Calculate new context tokens after truncation by counting non-truncated messages
Expand All @@ -341,11 +361,7 @@ export async function manageContext({
)

// Include system prompt tokens so this value matches what we send to the API.
// Note: `prevContextTokens` is computed locally here (totalTokens + lastMessageTokens).
let newContextTokensAfterTruncation = await estimateTokenCount(
[{ type: "text", text: systemPrompt }],
apiHandler,
)
let newContextTokensAfterTruncation = systemPromptTokens

for (const msg of effectiveMessages) {
const content = msg.content
Expand All @@ -361,7 +377,7 @@ export async function manageContext({

return {
messages: truncationResult.messages,
prevContextTokens,
prevContextTokens: localPrevContextTokens,
summary: "",
cost,
error,
Expand Down
Loading